pytorch
diff --git a/‎test/inductor/test_unbacked_symints.py
Lines changed: 89 additions & 0 deletions b/‎test/inductor/test_unbacked_symints.py
Lines changed: 89 additions & 0 deletions
diff --git a/‎torch/_inductor/ir.py
Lines changed: 14 additions & 6 deletions b/‎torch/_inductor/ir.py
Lines changed: 14 additions & 6 deletions
diff --git a/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 29 additions & 0 deletions b/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 29 additions & 0 deletions
@@ -294,6 +294,95 @@ def fn(value, mask):
         expected = fn(*example_inputs)
         torch.testing.assert_close(actual, expected)
 
+    @dynamo_config.patch({"capture_scalar_outputs": True})
+    @parametrize("dynamic", [False, True, None])
+    def test_unbacked_slice_on_subclass(self, device, dynamic):
+        from torch.testing._internal.common_subclass import WrapperTensor
+        from torch.utils._pytree import tree_map
+
+        # NB: the error we're testing for only triggers when unbacked SymInts
+        # are created within a subclass's torch_dispatch, because they're not seen
+        # by Dynamo and thus are considered freshly-created when the subclass instance
+        # return value of the torch_dispatch is handled.
+        # Subclass forwards everything along to the single underlying dense tensor
+        # component, except for slice(), which it handles via data-dependent bounds access
+        class CustomSliceSubclass(WrapperTensor):
+            @classmethod
+            def get_wrapper_properties(cls, t, slice_bounds=None):
+                return t, {}
+
+            def __init__(self, t, slice_bounds=None):
+                self.t = t
+                self.slice_bounds = slice_bounds
+
+            def __repr__(self):
+                t_repr = repr(self.t)
+                slice_bounds_repr = repr(self.slice_bounds)
+                return f"CustomSliceSubclass({t_repr}, {slice_bounds_repr})"
+
+            def __tensor_flatten__(self):
+                return ["t", "slice_bounds"], None
+
+            @classmethod
+            def __tensor_unflatten__(
+                cls, inner_tensors, meta, outer_size, outer_stride
+            ):
+                t = inner_tensors["t"]
+                slice_bounds = inner_tensors["slice_bounds"]
+                return cls(t, slice_bounds)
+
+            @classmethod
+            def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
+                if func is torch.ops.aten.slice.Tensor:
+                    inp = args[0]
+
+                    start = inp.slice_bounds[0].item()
+                    torch._check_is_size(start)
+                    torch._check(start <= inp.size(0))
+
+                    length = (args[0].slice_bounds[1] - args[0].slice_bounds[0]).item()
+                    torch._check_is_size(length)
+                    torch._check(start + length <= inp.size(0))
+
+                    return CustomSliceSubclass(
+                        func(args[0].t, dim=0, start=start, end=(start + length)),
+                        slice_bounds=args[0].slice_bounds,
+                    )
+
+                if not all(issubclass(cls, t) for t in types):
+                    return NotImplemented
+
+                if kwargs is None:
+                    kwargs = {}
+
+                def unwrap(e):
+                    return e.t if isinstance(e, CustomSliceSubclass) else e
+
+                def wrap(e):
+                    return CustomSliceSubclass(e) if isinstance(e, torch.Tensor) else e
+
+                rs = tree_map(
+                    wrap,
+                    func(*tree_map(unwrap, args), **tree_map(unwrap, kwargs or {})),
+                )
+                return rs
+
+        def fn(t, start, length):
+            return torch.ops.aten.slice.Tensor(
+                t, dim=0, start=start, end=start + length
+            )
+
+        t = make_tensor(22, 5, dtype=torch.float32, device=device)
+        sub = CustomSliceSubclass(t, slice_bounds=torch.tensor([2, 5], device=t.device))
+        start = 2
+        length = 3
+        ragged_idx = 1
+        example_inputs = (sub, start, length)
+
+        actual = torch.compile(fn, dynamic=dynamic, fullgraph=True)(*example_inputs)
+        expected = fn(*example_inputs)
+        torch.testing.assert_close(actual.t, expected.t)
+
 
 instantiate_device_type_tests(TestUnbackedSymints, globals(), allow_xpu=True)
 
 
@@ -3009,14 +3009,22 @@ def normalize_start_end(cls, x, dim, start, end):  # type: ignore[no-untyped-def
         dim_size = x.get_size()[dim]
 
         if any(free_unbacked_symbols(x) for x in (start, end, dim_size)):
-
-            def clamp(x, lower, upper):  # type: ignore[no-untyped-def]
-                return sympy.Min(sympy.Max(x, lower), upper)
-
+            min_func = sympy.Min
+            max_func = sympy.Max
         else:
+            min_func = sizevars.evaluate_min
+            max_func = sizevars.evaluate_max
 
-            def clamp(x, lower, upper):  # type: ignore[no-untyped-def]
-                return sizevars.evaluate_min(sizevars.evaluate_max(x, lower), upper)
+        def clamp(x, lower, upper):  # type: ignore[no-untyped-def]
+            clamped_lower = (
+                x if sizevars.statically_known_geq(x, lower) else max_func(x, lower)
+            )
+            clamped_full = (
+                clamped_lower
+                if sizevars.statically_known_leq(clamped_lower, upper)
+                else min_func(clamped_lower, upper)
+            )
+            return clamped_full
 
         def clamp_wrap(val, lower, upper, default):  # type: ignore[no-untyped-def]
             if val is None:
 
@@ -1027,6 +1027,35 @@ def free_unbacked_symbols_with_path(
                 assert isinstance(real, int)
                 shape_env.set_unbacked_var_to_val(rhs, real // int(lhs))
             pending.remove(rhs)
+        # as previous, but for unbacked SymInt * backed SymInt e.g. s1*u0
+        elif (
+            isinstance(a, torch.SymInt)
+            and isinstance(s := a.node._expr, sympy.Mul)
+            and len(s.args) == 2
+            and isinstance(lhs := s.args[0], sympy.Symbol)
+            and isinstance(rhs := s.args[1], sympy.Symbol)
+            and ((rhs in pending) ^ (lhs in pending))
+            and (
+                (rhs in a.node.shape_env.var_to_val)
+                ^ (lhs in a.node.shape_env.var_to_val)
+            )
+        ):
+            unbacked, backed = (lhs, rhs) if lhs in pending else (rhs, lhs)
+            # NB: We need a SymInt to pass to DivideByKey.
+            # TODO: Is it really necessary to construct the SymInt here or can we get it
+            # from somewhere else?
+            key = DivideByKey(
+                a.node.shape_env.create_symintnode(
+                    backed,
+                    hint=int(a.node.shape_env.var_to_val[backed]),
+                    source=a.node.shape_env.var_to_sources.get(backed, [None])[0],
+                )
+            )
+            r[unbacked] = path + (key,)
+            if real is not None:
+                assert isinstance(real, int)
+                shape_env.set_unbacked_var_to_val(unbacked, CleanDiv(real, backed))
+            pending.remove(unbacked)
         # The annoyance here arises from the fact that SymBool is
         # allocated by allocating a SymInt and then testing if it's equal
         # to one.  So you have a complicated binding site logic for this.