pytorch
diff --git a/‎torch/_inductor/decomposition.py
Lines changed: 16 additions & 16 deletions b/‎torch/_inductor/decomposition.py
Lines changed: 16 additions & 16 deletions
diff --git a/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 25 additions & 0 deletions b/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 25 additions & 0 deletions
@@ -31,7 +31,11 @@
     ELEMENTWISE_TYPE_PROMOTION_KIND,
     type_to_dtype,
 )
-from torch.fx.experimental.symbolic_shapes import definitely_true, guard_size_oblivious
+from torch.fx.experimental.symbolic_shapes import (
+    definitely_true,
+    guard_else_false,
+    guard_else_true,
+)
 
 from . import config, inductor_prims
 from .utils import (
@@ -261,13 +265,13 @@ def bmm(
     batch2: torch.Tensor,
 ) -> torch.Tensor:
     if config.coordinate_descent_tuning and self.device.type != "cpu":
-        if guard_size_oblivious(self.shape[1] == 1) or guard_size_oblivious(
+        if guard_else_false(self.shape[1] == 1) or guard_else_false(
             batch2.shape[2] == 1
         ):
             out = (self.unsqueeze(-1) * batch2.unsqueeze(1)).sum(dim=2)
             return out
     if self.device.type == "cpu":
-        if guard_size_oblivious(self.size(1) == 1) and guard_size_oblivious(
+        if guard_else_false(self.size(1) == 1) and guard_else_false(
             batch2.size(-1) == 1
         ):
             counters["inductor"]["decompose_bmm"] += 1
@@ -287,16 +291,14 @@ def addmm(
     alpha: torch.types.Number = 1,
 ) -> torch.Tensor:
     if self.device.type == "cpu":
-        if guard_size_oblivious(mat1.size(0) == 1) and guard_size_oblivious(
-            mat2.size(-1) == 1
-        ):
+        if guard_else_false(mat1.size(0) == 1) and guard_else_false(mat2.size(-1) == 1):
             counters["inductor"]["decompose_addmm"] += 1
             out = torch.sum(
                 mat1.squeeze(0) * mat2.squeeze(-1), dim=0, keepdim=True
             ).unsqueeze(0)
             return alpha * out + beta * self
         if (
-            guard_size_oblivious(mat1.size(0) == 1)
+            guard_else_false(mat1.size(0) == 1)
             and definitely_true(mat2.size(0) <= 16)
             and definitely_true(mat2.size(1) <= 16)
         ):
@@ -315,21 +317,21 @@ def mm(
     # Our matrix vector multiplies only achieve peak bandwidth with coordinate descent tuning.
     # todo: Look into why and fix it (hopefully)
     if config.coordinate_descent_tuning and self.device.type != "cpu":
-        if guard_size_oblivious(self.shape[0] == 1) or guard_size_oblivious(
+        if guard_else_false(self.shape[0] == 1) or guard_else_false(
             input2.shape[1] == 1
         ):
             return (self.unsqueeze(2) * input2.unsqueeze(0)).sum(dim=1)
     if self.device.type == "cpu":
         if (
-            guard_size_oblivious(self.size(-1) == 1)
-            and guard_size_oblivious(self.size(0) > 0)
-            and guard_size_oblivious(input2.size(0) == 1)
+            guard_else_false(self.size(-1) == 1)
+            and guard_else_true(self.size(0) > 0)
+            and guard_else_false(input2.size(0) == 1)
             and (self.dtype == input2.dtype)
             and definitely_true((torch.numel(self) + torch.numel(input2)) <= 32)
         ):
             counters["inductor"]["decompose_mm"] += 1
             return torch.cat([self[i, :] * input2 for i in range(self.size(0))])
-        if guard_size_oblivious(self.size(0) == 1) and guard_size_oblivious(
+        if guard_else_false(self.size(0) == 1) and guard_else_false(
             input2.size(-1) == 1
         ):
             counters["inductor"]["decompose_mm"] += 1
@@ -348,8 +350,6 @@ def cat(
     tensors: list[torch.Tensor],
     dim: int = 0,
 ) -> torch.Tensor:
-    from torch.fx.experimental.symbolic_shapes import guard_size_oblivious
-
     def non_empty_tensor(x: torch.Tensor) -> bool:
         # For better or worse, this is a valid cat:
         #
@@ -367,10 +367,10 @@ def non_empty_tensor(x: torch.Tensor) -> bool:
         # runtime assert forcing u0 to be zero.  So if this hasn't happened,
         # we know that the unbacked SymInt has appropriate size and there are
         # no problems.
-        if len(x.shape) == 1 and guard_size_oblivious(x.shape[0] == 0):
+        if len(x.shape) == 1 and guard_else_false(x.shape[0] == 0):
             return False
 
-        if dim < len(x.shape) and guard_size_oblivious(x.shape[dim] == 0):
+        if dim < len(x.shape) and guard_else_false(x.shape[dim] == 0):
             return False
 
         return True
 
@@ -1149,6 +1149,31 @@ def _symint_wrap(s: sympy.Symbol) -> SymInt:
     return symbol_to_path
 
 
+# This is used for size oblivious reasoning to avoid 0/1 specializations.
+def guard_else_false(a: BoolLikeType) -> bool:
+    """
+    try to gaurd a, if data dependent error encountered just return false.
+    """
+    if isinstance(a, SymBool):
+        try:
+            guard_bool(a)
+        except GuardOnDataDependentSymNode:
+            return False
+    return bool(a)
+
+
+def guard_else_true(a: BoolLikeType) -> bool:
+    """
+    try to gaurd a, if data dependent error encountered just return true.
+    """
+    if isinstance(a, SymBool):
+        try:
+            guard_bool(a)
+        except GuardOnDataDependentSymNode:
+            return True
+    return bool(a)
+
+
 def definitely_true(a: BoolLikeType) -> bool:
     """
     Returns True only if we can tell that a is True, possibly introducing