pytorch
diff --git a/‎torch/_prims_common/__init__.py‎
Lines changed: 16 additions & 2 deletions b/‎torch/_prims_common/__init__.py‎
Lines changed: 16 additions & 2 deletions
@@ -542,7 +542,10 @@ def is_non_overlapping_and_dense(a: Tensor) -> bool:
 def compute_elementwise_output_logical_to_physical_perm(
     *tensors, _skip_checks=False
 ) -> list[int]:
-    from torch.fx.experimental.symbolic_shapes import guard_size_oblivious
+    from torch.fx.experimental.symbolic_shapes import (
+        guard_or_false,
+        guard_size_oblivious,
+    )
 
     if not _skip_checks and len(tensors) == 0:
         msg = "Can't compute elementwise output strides for zero tensors!"
@@ -595,12 +598,23 @@ def should_swap(idx_a, idx_b):
         for tensor in tensors:
             stride_a = tensor.stride()[idx_a]
             stride_b = tensor.stride()[idx_b]
-
             if guard_size_oblivious(stride_a == 0) or guard_size_oblivious(
                 stride_b == 0
             ):
                 continue
 
+            if guard_or_false(stride_a == stride_b):
+                if guard_size_oblivious(shape[idx_a] > shape[idx_b]):
+                    return 1
+
+            # when stride_a = 1, we want stride_a < stride_b to be TRUE
+            # when stride_b = 1, we want stride_a < stride_b to be FALSE
+            if guard_or_false(stride_a == 1):
+                return -1
+
+            if guard_or_false(stride_b == 1):
+                return 1
+
             if guard_size_oblivious(stride_a < stride_b):
                 return -1