pytorch
diff --git a/‎test/dynamo/test_aot_autograd_cache.py
Lines changed: 0 additions & 43 deletions b/‎test/dynamo/test_aot_autograd_cache.py
Lines changed: 0 additions & 43 deletions
diff --git a/‎torch/_dynamo/output_graph.py
Lines changed: 23 additions & 31 deletions b/‎torch/_dynamo/output_graph.py
Lines changed: 23 additions & 31 deletions
diff --git a/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 1 addition & 1 deletion b/‎torch/fx/experimental/symbolic_shapes.py
Lines changed: 1 addition & 1 deletion
@@ -314,49 +314,6 @@ def fn(x, y):
         self.assertEqual(counters["aot_autograd"]["autograd_cache_hit"], 1)
         self.assertEqual(counters["aot_autograd"]["autograd_cache_saved"], 1)
 
-    @inductor_config.patch("fx_graph_remote_cache", False)
-    @inductor_config.patch("fx_graph_cache", True)
-    @functorch_config.patch({"enable_autograd_cache": True})
-    def test_multi_graph_specialization(self):
-        """
-        Verify multi graph specializations all cache hit
-        """
-
-        def fn(x):
-            return x * 5
-
-        a = torch.randn(5)
-        a8 = torch.randn(8)
-        a16 = torch.randn(16)
-        torch._dynamo.mark_dynamic(
-            a,
-            0,
-            specialize_on=[
-                lambda x: x == 8,
-                lambda x: x == 16,
-            ],
-        )
-
-        compiled_fn = torch.compile(fn, backend="inductor")
-
-        # A first call should miss in the cache.
-        compiled_fn(a)
-        compiled_fn(a8)
-        compiled_fn(a16)
-        self.assertEqual(counters["aot_autograd"]["autograd_cache_miss"], 3)
-        self.assertEqual(counters["aot_autograd"]["autograd_cache_hit"], 0)
-        self.assertEqual(counters["aot_autograd"]["autograd_cache_saved"], 3)
-
-        self._clear_dynamo_and_codecache()
-
-        # A second call should hit on all 3 graphs
-        compiled_fn(a)
-        compiled_fn(a8)
-        compiled_fn(a16)
-        self.assertEqual(counters["aot_autograd"]["autograd_cache_miss"], 3)
-        self.assertEqual(counters["aot_autograd"]["autograd_cache_hit"], 3)
-        self.assertEqual(counters["aot_autograd"]["autograd_cache_saved"], 3)
-
     @inductor_config.patch("fx_graph_remote_cache", False)
     @inductor_config.patch("fx_graph_cache", True)
     @functorch_config.patch({"enable_autograd_cache": True})
 
@@ -33,7 +33,8 @@
 import sys
 import traceback
 import weakref
-from dataclasses import dataclass, replace
+from collections.abc import Sequence
+from dataclasses import dataclass
 from typing import Any, Callable, cast, Optional, TYPE_CHECKING, Union
 
 import sympy
@@ -51,7 +52,6 @@
     CompileId,
     GlobalContextCheckpointState,
     Source,
-    tracing,
     TracingContext,
 )
 from torch._subclasses.fake_tensor import FakeTensor
@@ -1502,7 +1502,7 @@ def compile_and_call_fx_graph(self, tx, rv, root):
                 self.tracing_context.fake_mode = backend_fake_mode
 
             with self.restore_global_state():
-                compiled_fn = self.call_user_compiler(gm)
+                compiled_fn = self.call_user_
8000
compiler(gm, self.example_inputs())
 
             from torch.fx._lazy_graph_module import _LazyGraphModule
 
@@ -1536,11 +1536,6 @@ def compile_and_call_fx_graph(self, tx, rv, root):
             if specializations := old_fake_mode.shape_env.specializations:
                 specialization_guards = []
                 specialization_cache: dict[Specialization, Callable[[Any], Any]] = {}
-                preserved_graphargs = [
-                    replace(node.meta["grapharg"], _example=None)
-                    for node in self.placeholders
-                ]
-                preserved_tracing_context = torch._guards.TracingContext.get()
                 sources = [a.source for a in self.graphargs]
                 for specialization in specializations:
                     source_index = sources.index(specialization.source)
@@ -1576,23 +1571,15 @@ def specialized_dispatch(*args, **kwargs):
                                     *args, **kwargs
                                 )
 
-                            for node, grapharg, arg in zip(
-                                self.placeholders, preserved_graphargs, args
+                            with self.shape_env.patch_source_specialization(
+                                specialization.source, specialization.check_fn
                             ):
-                                node.meta["grapharg"] = replace(grapharg, _example=arg)
-
-                            with tracing(preserved_tracing_context):
-                                shape_env = (
-                                    preserved_tracing_context.fake_mode.shape_env
+                                # Modify gm so AOTAutogradCache key changes per specialization
+                                gm.meta["specialization"] = specialization
+                                specialization_cache[specialization] = (
+                                    self.call_user_compiler(gm, args)
                                 )
-                                with shape_env.patch_source_specialization(
-                                    specialization.source, specialization.check_fn
-                                ):
-                                    # Modify gm so AOTAutogradCache key changes per specialization
-                                    gm.meta["specialization"] = specialization
-                                    specialization_cache[specialization] = (
-                                        self.call_user_compiler(gm)
-                                    )
+
                             return specialization_cache[specialization](*args, **kwargs)
                     return compiled_fn(*args, **kwargs)
 
@@ -1612,16 +1599,20 @@ def placeholders(self) -> list[fx.Node]:
     def graphargs(self) -> list[GraphArg]:
         return [node.meta["grapharg"] for node in self.placeholders]
 
-    def call_user_compiler(self, gm: fx.GraphModule) -> CompiledFn:
+    def call_user_compiler(
+        self, gm: fx.GraphModule, example_inputs: Sequence[Any]
+    ) -> CompiledFn:
         with dynamo_timed(
             "OutputGraph.call_user_compiler",
             phase_name="backend_compile",
             log_pt2_compile_event=True,
             dynamo_compile_column_us="aot_autograd_cumulative_compile_time_us",
         ):
-            return self._call_user_compiler(gm)
+            return self._call_user_compiler(gm, example_inputs)
 
-    def _call_user_compiler(self, gm: fx.GraphModule) -> CompiledFn:
+    def _call_user_compiler(
+        self, gm: fx.GraphModule, example_inputs: Sequence[Any]
+    ) -> CompiledFn:
         assert self.compiler_fn is not None
         tot = 0
         placeholders = []
@@ -1632,10 +1623,11 @@ def _call_user_compiler(self, gm: fx.GraphModule) -> CompiledFn:
                 placeholders.append(node)
         increment_op_count(tot)
         for pl in placeholders:
-            arg = pl.meta["grapharg"]
-            # TODO: Why isn't this stored in meta :think:
-            # NOTE: can't move these into meta: https://github.com/pytorch/pytorch/issues/141640
-            pl._dynamo_source = arg.source
+            if not hasattr(pl, "_dynamo_source"):
+                arg = pl.meta["grapharg"]
+                # TODO: Why isn't this stored in meta :think:
+                # NOTE: can't move these into meta: https://github.com/pytorch/pytorch/issues/141640
+                pl._dynamo_source = arg.source
 
         # NOTE: can't move these into meta: https://github.com/pytorch/pytorch/issues/141640
         gm._param_name_to_source = self.param_name_to_source  # type: ignore[assignment]
@@ -1651,7 +1643,7 @@ def _call_user_compiler(self, gm: fx.GraphModule) -> CompiledFn:
             compiler_fn = self.compiler_fn
             if config.verify_correctness:
                 compiler_fn = WrapperBackend(compiler_fn)
-            compiled_fn = compiler_fn(gm, self.example_inputs())
+            compiled_fn = compiler_fn(gm, example_inputs)
             _step_logger()(logging.INFO, f"done compiler function {name}")
             assert callable(compiled_fn), "compiler_fn did not return callable"
         except (TensorifyScalarRestartAnalysis, ShortenTraceback):
 
@@ -3634,7 +3634,7 @@ def patch_source_specialization(
         """
         name = source.name()
         sym = self.source_to_var[name]
-        expr = check_fn(sym)
+        expr = check_fn(SymInt(SymNode(sym, self, int, None))).node._expr
         new_axioms = dict(self.get_implications(self.simplify(expr)))
         self.axioms.update(new_axioms)
         try: