pytorch
diff --git a/‎torch/_prims_common/__init__.py‎
Lines changed: 22 additions & 3 deletions b/‎torch/_prims_common/__init__.py‎
Lines changed: 22 additions & 3 deletions
@@ -542,7 +542,10 @@ def is_non_overlapping_and_dense(a: Tensor) -> bool:
 def compute_elementwise_output_logical_to_physical_perm(
     *tensors, _skip_checks=False
 ) -> list[int]:
-    from torch.fx.experimental.symbolic_shapes import guard_size_oblivious
+    from torch.fx.experimental.symbolic_shapes import (
+        guard_or_false,
+        guard_size_oblivious,
+    )
 
     if not _skip_checks and len(tensors) == 0:
         msg = "Can't compute elementwise output strides for zero tensors!"
@@ -601,10 +604,26 @@ def should_swap(idx_a, idx_b):
             ):
                 continue
 
-            if guard_size_oblivious(stride_a < stride_b):
+            # imitates if stride_a < stride_b : return -1 but
+            # when stride_a = 1, we want stride_a < stride_b to be TRUE
+            # when stride_b = 1, we want stride_a < stride_b to be FALSE
+            if guard_or_false(stride_a == 1):
                 return -1
 
-            if guard_size_oblivious(stride_a > stride_b):
+            if not guard_or_false(stride_b == 1) and guard_size_oblivious(
+                stride_a < stride_b
+            ):
+                return -1
+
+            # imitates if stride_a > stride_b : return 1 but
+            # when stride_b = 1, we want stride_a > stride_b to be TRUE
+            # when stride_a = 1, we want  stride_a > stride_b to be FALSE
+            if guard_or_false(stride_b == 1):
+                return 1
+
+            if not guard_or_false(stride_a == 1) and guard_size_oblivious(
+                stride_a > stride_b
+            ):
                 return 1
 
             # stride_a == stride_b