pytorch
diff --git a/‎test/dynamo/test_profiler.py‎
Lines changed: 1 addition & 1 deletion b/‎test/dynamo/test_profiler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/profiler/test_execution_trace.py‎
Lines changed: 4 additions & 0 deletions b/‎test/profiler/test_execution_trace.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎torch/_inductor/output_code.py‎
Lines changed: 5 additions & 1 deletion b/‎torch/_inductor/output_code.py‎
Lines changed: 5 additions & 1 deletion
@@ -181,7 +181,7 @@ def fn(x, y):
                 torch.randn(10, 15),
             )
 
-        annotations = [e.name for e in prof.events() if "Compiled" in e.name]
+        annotations = [e.name for e in prof.events() if "Torch-Compiled" in e.name]
         self.assertEqual(
             annotations,
             [
 
@@ -404,6 +404,7 @@ def fn(a, b, c):
 
         nodes = self.get_execution_trace_root(fp.name)
         found_captured_triton_kernel_node = False
+        found_call_compiled_fx_graph = False
         for n in nodes:
             assert "name" in n
             if "triton_" in n["name"]:
@@ -412,7 +413,10 @@ def fn(a, b, c):
                         found_captured_triton_kernel_node = True
                         assert len(n["inputs"]["values"]) > 0
                         assert len(n["outputs"]["values"]) == 0
+            elif "Call CompiledFxGraph" in n["name"]:
+                found_call_compiled_fx_graph = True
         assert found_captured_triton_kernel_node
+        assert found_call_compiled_fx_graph
 
     @unittest.skipIf(IS_WINDOWS, "torch.compile does not support WINDOWS")
     @unittest.skipIf(
 
@@ -48,6 +48,7 @@
     output_node,
     set_tracing_context_output_strides,
 )
+from torch.autograd.profiler import record_function
 from torch.utils._ordered_set import OrderedSet
 
 from . import config
@@ -581,7 +582,10 @@ def __del__(self) -> None:
     def __call__(self, inputs: Sequence[Any]) -> Any:
         assert self.current_callable is not None
         try:
-            return self.current_callable(inputs)
+            with record_function(
+                f"## Call CompiledFxGraph {self._fx_graph_cache_key} ##"
+            ):
+                return self.current_callable(inputs)
         finally:
             get_runtime_metrics_context().finish()
             AutotuneCacheBundler.end_compile()
Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ def fn(x, y):`
`181`	`181`	`torch.randn(10, 15),`
`182`	`182`	`)`
`183`	`183`
`184`		`- annotations = [e.name for e in prof.events() if "Compiled" in e.name]`
	`184`	`+ annotations = [e.name for e in prof.events() if "Torch-Compiled" in e.name]`
`185`	`185`	`self.assertEqual(`
`186`	`186`	`annotations,`
`187`	`187`	`[`