pytorch · eellison · May 9, 2025 · May 9, 2025 · May 28, 2025 · May 28, 2025
diff --git a/test/dynamo/test_compiler_bisector.py b/test/dynamo/test_compiler_bisector.py
@@ -10,6 +10,7 @@
 from torch._inductor import config
 from torch._inductor.compiler_bisector import CompilerBisector
 from torch._inductor.test_case import TestCase
+from torch._inductor.utils import run_and_get_code
 from torch.library import _scoped_library, Library
 from torch.testing._internal.inductor_utils import HAS_CUDA
 
@@ -149,6 +150,36 @@ def test_fn():
         self.assertEqual(out.subsystem, "inductor_fallback_random")
         self.assertTrue("inductor_fallback_random" in out.debug_info)
 
+    # todo - incorporate to compiler bisector
+    @torch._dynamo.config.patch(debug_max_graphs=1)
+    def test_bisecting_num_graphs(self):
+        from torch._dynamo.utils import counters
+
+        def foo(x):
+            out = x + 3
+            torch._dynamo.graph_break()
+            return out * 2
+
+        out = torch.compile(foo)(torch.ones([4], device="cuda"))
+        self.assertEqual(out, foo(torch.ones([4], device="cuda")))
+        self.assertEqual(counters["aot_autograd"]["total"], 1)
+
+    # todo - incorporate to compiler bisector
+    @torch._dynamo.config.patch(debug_max_backend_graphs=1)
+    def test_bisecting_backend_graphs(self):
+        from torch._dynamo.utils import counters
+
+        def foo(x):
+            out = x + 3
+            torch._dynamo.graph_break()
+            return out * 2
+
+        inp = torch.ones([4], device="cuda")
+        out, code = run_and_get_code(torch.compile(foo), inp)
+        self.assertEqual(len(code), 1)
+        self.assertEqual(counters["aot_autograd"]["total"], 2)
+        self.assertEqual(out, foo(torch.ones([4], device="cuda")))
+
     def test_crossref(self):
         with _scoped_library(self.test_ns, "FRAGMENT") as lib:
             lib.define("foo(Tensor x) -> Tensor")

diff --git a/torch/__init__.py b/torch/__init__.py
@@ -2289,6 +2289,32 @@ def compiled_with_cxx11_abi() -> builtins.bool:
 from torch.utils.dlpack import from_dlpack, to_dlpack
 
 
+def check_max_graphs() -> bool:
+    """
+    If we have hit a user specified max number of graphs, skip this frame.
+
+    Then, return if we have hit the maximum number of graphs for the given backend
+    before falling back to aot_eager.
+    """
+    from torch._dynamo.utils import GraphsCompiledState
+
+    max_compiled_graphs = torch._dynamo.config.debug_max_graphs
+    max_backend_graphs = torch._dynamo.config.debug_max_backend_graphs
+    if max_compiled_graphs is None and max_backend_graphs is None:
+        return None
+
+    num_graphs = GraphsCompiledState.increment()
+    num_graphs = GraphsCompiledState.get_num_graphs()
+    if max_compiled_graphs is not None and num_graphs > builtins.int(
+        max_compiled_graphs
+    ):
+        raise torch._dynamo.exc.SkipFrame(f"Hit max graph limit: {max_compiled_graphs}")
+
+    return max_backend_graphs is not None and num_graphs > builtins.int(
+        max_backend_graphs
+    )
+
+
 class _TorchCompileInductorWrapper:
     compiler_name = "inductor"
 
@@ -2361,6 +2387,11 @@ def apply_options(self, options: _Optional[dict[str, _Any]]):
     def __call__(self, model_, inputs_):
         from torch._inductor.compile_fx import compile_fx
 
+        if check_max_graphs():
+            return _TorchCompileWrapper(
+                "aot_eager", "default", {}, self.dynamic
+            ).__call__(model_, inputs_)
+
         return compile_fx(model_, inputs_, config_patches=self.config)
 
     def get_compiler_config(self):
@@ -2406,6 +2437,7 @@ def __eq__(self, other):
         )
 
     def __call__(self, model_, inputs_):
+        check_max_graphs()
         return self.compiler_fn(model_, inputs_, **self.kwargs)
 
     def reset(self):

diff --git a/torch/_dynamo/__init__.py b/torch/_dynamo/__init__.py
@@ -48,7 +48,13 @@
 from .mutation_guard import GenerationTracker
 from .pgo import reset_code_state
 from .symbolic_convert import TensorifyState
-from .utils import graph_break_reasons, guard_failures, orig_code_map, reset_frame_count
+from .utils import (
+    graph_break_reasons,
+    GraphsCompiledState,
+    guard_failures,
+    orig_code_map,
+    reset_frame_count,
+)
 
 
 # Register polyfill functions
@@ -131,6 +137,7 @@ def reset() -> None:
         callback_handler.clear()
         GenerationTracker.clear()
         TensorifyState.clear()
+        GraphsCompiledState.clear()
         torch._dynamo.utils.warn_once_cache.clear()
         torch._dynamo.utils.user_obj_id_to_weakref.clear()
         torch._C._autograd._saved_tensors_hooks_set_tracing(False)

diff --git a/torch/_dynamo/config.py b/torch/_dynamo/config.py
@@ -243,6 +243,14 @@
 # [@compile_ignored: runtime_behaviour]
 same_two_models_use_fp64 = True
 
+# maximum number of dynamo graphs to compile.
+# if we exceed this limit, we will raise a SkipFrame
+debug_max_graphs = os.environ.get("TORCH_BISECT_MAX_GRAPHS", None)
+
+    # maximum number of dynamo graphs to invoke with compiled bakcend
+# if we exeed this limit, we will defer to aot_eager
+debug_max_backend_graphs = os.environ.get("TORCH_BISECT_MAX_BACKEND_GRAPHS", None)
+
 # Not all backends support scalars. Some calls on torch.Tensor (like .item()) return a scalar type.
 # When this flag is set to False, we introduce a graph break instead of capturing.
 # This requires dynamic_shapes to be True.

diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py
@@ -4544,6 +4544,26 @@ def record(cls):
                 cls.end()
 
 
+class GraphsCompiledState:
+    """
+    Tracks number of compiled graphs.
+    """
+
+    num_graphs: int = 0
+
+    @classmethod
+    def clear(cls) -> None:
+        cls.num_graphs = 0
+
+    @classmethod
+    def increment(cls) -> None:
+        cls.num_graphs += 1
+
+    @classmethod
+    def get_num_graphs(cls) -> int:
+        return cls.num_graphs
+
+
 def set_feature_use(feature: str, usage: bool):
     """
     Records whether we are using a feature