pytorch
diff --git a/‎test/higher_order_ops/test_invoke_subgraph.py
Lines changed: 40 additions & 0 deletions b/‎test/higher_order_ops/test_invoke_subgraph.py
Lines changed: 40 additions & 0 deletions
diff --git a/‎torch/_inductor/fx_passes/post_grad.py
Lines changed: 8 additions & 0 deletions b/‎torch/_inductor/fx_passes/post_grad.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎torch/fx/passes/fake_tensor_prop.py
Lines changed: 29 additions & 0 deletions b/‎torch/fx/passes/fake_tensor_prop.py
Lines changed: 29 additions & 0 deletions
@@ -617,6 +617,46 @@ def forward(self, a: "f32[8]", l_y_: "f32[8]"):
 """,
             )
 
+    def test_view_to_reshape(self):
+        @mark_compile_region
+        def gn(x):
+            x = torch.sin(x)
+            x = x.view(1, 8)
+            return torch.sin(x)
+
+        def fn(x):
+            return gn(x)
+
+        x = torch.randn(8, requires_grad=False)
+
+        torch._dynamo.reset()
+        backend = InductorAndRecordGraphs()
+        torch.compile(fn, backend=backend, fullgraph=True)(x)
+
+        if not TEST_WITH_CROSSREF:
+            self.assertExpectedInline(
+                normalize_gm(
+                    backend.inductor_graphs[0].print_readable(print_output=False)
+                ),
+                """\
+class <lambda>(torch.nn.Module):
+    def forward(self, arg0_1: "f32[8]"):
+        repeated_subgraph0 = self.repeated_subgraph0
+        invoke_subgraph = torch.ops.higher_order.invoke_subgraph(repeated_subgraph0, 'subgraph_0', arg0_1);  repeated_subgraph0 = arg0_1 = None
+        getitem: "f32[1, 8]" = invoke_subgraph[0];  invoke_subgraph = None
+        return (getitem,)
+
+    class repeated_subgraph0(torch.nn.Module):
+        def forward(self, arg0_1: "f32[8]"):
+            sin: "f32[8]" = torch.ops.aten.sin.default(arg0_1);  arg0_1 = None
+
+            view: "f32[1, 8]" = torch.ops.aten.reshape.default(sin, [1, 8]);  sin = None
+
+            sin_1: "f32[1, 8]" = torch.ops.aten.sin.default(view);  view = None
+            return (sin_1,)
+""",
+            )
+
     def test_normalize_gm(self):
         @mark_compile_region
         def gn(x, y):
 
@@ -1202,6 +1202,14 @@ def view_to_reshape(gm):
     """
     Replace view ops in the GraphModule to reshape ops.
     """
+    subgraph_names: OrderedSet[str] = OrderedSet(
+        x.target for x in gm.graph.find_nodes(op="get_attr")
+    )
+
+    for child_name, child_mod in gm.named_children():
+        if child_name in subgraph_names and isinstance(child_mod, torch.fx.GraphModule):
+            view_to_reshape(child_mod)
+
     for nd in gm.graph.find_nodes(
         op="call_function", target=torch.ops.aten.view.default
     ):
 
@@ -7,6 +7,7 @@
 from torch.fx._compatibility import compatibility
 from torch.fx.experimental.proxy_tensor import py_sym_types, snapshot_fake
 from torch.fx.node import map_aggregate
+from torch.utils._ordered_set import OrderedSet
 
 
 __all__ = ["FakeTensorProp"]
@@ -36,13 +37,41 @@ def __init__(
         self._mode = mode
         mode.epoch += 1
         mode.reset_nt_tensor_id_counter()
+        self.seen_subgraphs: OrderedSet[str] = OrderedSet()
 
     def run_node(self, n: Node):
         from torch.fx.experimental.symbolic_shapes import (
             compute_unbacked_bindings,
             rebind_unbacked,
         )
 
+        if (
+            n.op == "call_function"
+            and n.target is torch.ops.higher_order.invoke_subgraph
+            and n.args[1] not in self.seen_subgraphs
+        ):
+            # Prevent redundant fake tensor prop for invoke_subgraphs. Note that
+            # there is also fake tensor caching for the entire subgraph. This
+            # happens the next time we call `run_node` for the same subgraph,
+            # which goes through super.run_node and caches the fake tensor prop.
+            # Therefore, we are propagating fake tensor through the subgraphs
+            # twice.
+            assert isinstance(n.args[1], str)
+            assert (
+                isinstance(n.args[0], torch.fx.Node)
+                and n.args[0].op == "get_attr"
+                and isinstance(n.args[0].target, str)
+            )
+            self.seen_subgraphs.add(n.args[1])
+            operands = n.args[2:]
+            example_inputs = []
+            for operand in operands:
+                assert isinstance(operand, torch.fx.Node) and "val" in operand.meta
+                example_inputs.append(operand.meta["val"])
+            return FakeTensorProp(
+                getattr(self.module, n.args[0].target), mode=self._mode
+            ).propagate(*example_inputs)
+
4861
         result = super().run_node(n)
         rebind_unbacked(self._mode.shape_env, n, result)