pytorch
diff --git a/‎.ci/docker/ci_commit_pins/triton.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/triton.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_commit_pins/xla.txt‎
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/xla.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/inductor-periodic.yml‎
Lines changed: 15 additions & 15 deletions b/‎.github/workflows/inductor-periodic.yml‎
Lines changed: 15 additions & 15 deletions
diff --git a/‎aten/src/ATen/native/ComparisonUtils.cpp‎
Lines changed: 23 additions & 0 deletions b/‎aten/src/ATen/native/ComparisonUtils.cpp‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎test/dynamo/test_guard_manager.py‎
Lines changed: 11 additions & 3 deletions b/‎test/dynamo/test_guard_manager.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎test/export/test_export.py‎
Lines changed: 17 additions & 0 deletions b/‎test/export/test_export.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎test/inductor/test_compiled_autograd.py‎
Lines changed: 21 additions & 6 deletions b/‎test/inductor/test_compiled_autograd.py‎
Lines changed: 21 additions & 6 deletions
diff --git a/‎test/run_test.py‎
Lines changed: 0 additions & 2 deletions b/‎test/run_test.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎torch/_C/_dynamo/guards.pyi‎
Lines changed: 3 additions & 0 deletions b/‎torch/_C/_dynamo/guards.pyi‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎torch/_dynamo/guards.py‎
Lines changed: 6 additions & 6 deletions b/‎torch/_dynamo/guards.py‎
Lines changed: 6 additions & 6 deletions
@@ -1 +1 @@
-11ec6354315768a85da41032535e3b7b99c5f706
+f7888497a1eb9e98d4c07537f0d0bcfe180d1363
@@ -1 +1 @@
-29ae4c76c026185f417a25e841d2cd5e65f087a3
+b6a5b82b9948b610fa4c304d0d869c82b8f17db1
@@ -81,21 +81,21 @@ jobs:
       sync-tag: rocm-build
       test-matrix: |
         { include: [
-          { config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamo_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamo_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamo_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamo_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamic_aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamic_aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
-          { config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
-
6D38
          { config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
+          { config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamo_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamo_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamo_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamo_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamic_aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamic_aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
+          { config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
         ]}
     secrets: inherit
 
 
@@ -24,6 +24,29 @@ static void _assert_match(const O& original, const C& compared, const std::strin
   }
 }
 
+template<>
+void _assert_match<c10::Device, std::optional<c10::Device>>(
+    const c10::Device& original,
+    const std::optional<c10::Device>& compared,
+    const std::string& name) {
+  if (compared) {
+    const c10::Device& expected = compared.value();
+    if (original.type() != expected.type()) {
+      std::stringstream msg;
+      msg << "Tensor " << name << " mismatch! Expected: " << expected << ", Got: " << original;
+      throw std::runtime_error(msg.str());
+    }
+
+    // If the expected device doesn't have an index (e.g., just "cuda"),
+    // or if both devices have the same index, consider them equal
+    if (expected.has_index() && original.has_index() && expected.index() != original.index()) {
+      std::stringstream msg;
+      msg << "Tensor " << name << " mismatch! Expected: " << expected << ", Got: " << original;
+      throw std::runtime_error(msg.str());
+    }
+  }
+}
+
 void _assert_tensor_metadata_meta_symint(at::Tensor const& tensor, at::OptionalSymIntArrayRef sizes, at::OptionalSymIntArrayRef strides, std::optional<c10::ScalarType> dtype, std::optional<c10::Device> device, std::optional<c10::Layout> layout) {
   _assert_match(tensor.sym_sizes(), sizes, "sizes");
   _assert_match(tensor.sym_strides(), strides, "strides");
 
@@ -931,7 +931,7 @@ def hook(guard_wrapper, f_locals, builder):
 
             # Check types of foo.x
             foo_x_mgr = builder.get_guard_manager_from_source(foo_x_source)
-            self.assertTrue(foo_x_mgr.is_guarded_value_dict())
+            self.assertTrue(issubclass(foo_x_mgr.get_type_of_guarded_value(), dict))
 
             # Check types of foo.x["a"]
             foo_x_a_source = DictGetItemSource(foo_x_source, "a")
@@ -946,12 +946,14 @@ def hook(guard_wrapper, f_locals, builder):
             # Check types of foo.z
             foo_z_source = AttrSource(foo_source, "z")
             foo_z_mgr = builder.get_guard_manager_from_source(foo_z_source)
-            self.assertTrue(foo_z_mgr.is_guarded_value_empty_dict())
+            self.assertTrue(issubclass(foo_z_mgr.get_type_of_guarded_value(), dict))
 
             # Check types of mod
             mod_source = LocalSource("mod")
             mod_mgr = builder.get_guard_manager_from_source(mod_source)
-            self.assertTrue(mod_mgr.is_guarded_value_nn_module())
+            self.assertTrue(
+                issubclass(mod_mgr.get_type_of_guarded_value(), torch.nn.Module)
+            )
 
         opt_fn = torch.compile(fn, backend="eager", fullgraph=True)
         with install_guard_manager_testing_hook(hook):
@@ -1006,6 +1008,12 @@ def hook(guard_wrapper, f_locals, builder):
             from torch._dynamo.source import AttrSource, LocalSource
 
             foo_source = LocalSource("foo")
+            foo_mgr = builder.get_guard_manager_from_source(foo_source)
+            for accessor in foo_mgr.get_accessors():
+                if isinstance(accessor, GetAttrGuardAccessor):
+                    self.assertTrue(
+                        accessor.get_attr_name() in ("a", "b", "c", "d", "e")
+                    )
 
             # Check types of foo.a
             foo_a_source = AttrSource(foo_source, "a")
 
@@ -59,6 +59,7 @@
     OutputSpec,
     TensorArgument,
 )
+from torch.export.passes import move_to_device_pass
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch.fx.experimental.symbolic_shapes import ShapeEnv
 from torch.testing import FileCheck
@@ -15914,6 +15915,22 @@ def forward(self, x):
             len(list(new_ep.graph.nodes)[-1].args[0]), len(signature.output_specs)
         )
 
+    @requires_cuda
+    def test_assert_tensor_metadata_device_index(self):
+        class N(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y):
+                x = x.float()
+                y = y.float()
+                return x + y
+
+        inp = (torch.randn(3, device="cuda"), torch.randn(3, device="cuda"))
+        ep = export(N(), inp)
+        ep = move_to_device_pass(ep, {"cuda:0": "cuda"})
+        ep.module()(torch.randn(3, device="cuda:0"), torch.randn(3, device="cuda:0"))
+
     def test_input_output_no_stacktrace(self):
         class M(torch.nn.Module):
             def forward(self, x):
 
@@ -29,6 +29,7 @@
 from torch._dynamo.testing import normalize_gm
 from torch._dynamo.utils import counters
 from torch._inductor import config as inductor_config
+from torch._inductor.cpp_builder import is_msvc_cl
 from torch._inductor.test_case import run_tests, TestCase
 from torch.nn.attention.flex_attention import flex_attention
 from torch.nn.parallel import DistributedDataParallel as DDP
@@ -40,6 +41,7 @@
 from torch.testing._internal.common_utils import (
     instantiate_parametrized_tests,
     IS_S390X,
+    IS_WINDOWS,
     parametrize,
     scoped_load_inline,
     skipIfWindows,
@@ -193,6 +195,18 @@ def model(i):
         for _ in range(3):
             self.run_as_subprocess(script)
 
+    def gen_cache_miss_log_prefix(self):
+        if IS_WINDOWS:
+            if is_msvc_cl():
+                return "Cache miss due to new autograd node: struct "
+            else:
+                self.fail(
+                    "Compilers other than msvc have not yet been verified on Windows."
+                )
+                return ""
+        else:
+            return "Cache miss due to new autograd node: "
+
     def test_reset(self):
         compiled_autograd.compiled_autograd_enabled = True
         torch._C._dynamo.compiled_autograd.set_autograd_compiler(lambda: None, True)
@@ -3146,7 +3160,7 @@ def test_logs(self):
         self.assertEqual(counters["compiled_autograd"]["compiles"], 1)
         assert "torch::autograd::AccumulateGrad (NodeCall" in logs.getvalue()
         assert (
-            "Cache miss due to new autograd node: torch::autograd::GraphRoot"
+            self.gen_cache_miss_log_prefix() + "torch::autograd::GraphRoot"
             not in logs.getvalue()
         )
 
@@ -3353,7 +3367,6 @@ def fn(x, obj):
             sum(1 for e in expected_logs if e in logs.getvalue()), len(expected_logs)
         )
 
-    @skipIfWindows(msg="AssertionError: Scalars are not equal!")
     def test_verbose_logs_cpp(self):
         torch._logging.set_logs(compiled_autograd_verbose=True)
 
@@ -3381,8 +3394,9 @@ def fn():
             self.check_output_and_recompiles(fn)
 
         patterns1 = [
-            r".*Cache miss due to new autograd node: torch::autograd::GraphRoot \(NodeCall 0\) with key size (\d+), "
-            r"previous key sizes=\[\]\n",
+            r".*"
+            + self.gen_cache_miss_log_prefix()
+            + r"torch::autograd::GraphRoot \(NodeCall 0\) with key size (\d+), previous key sizes=\[\]\n",
         ]
 
         all_logs = logs.getvalue()
@@ -3420,7 +3434,8 @@ def test_verbose_logs_dynamic_shapes(self):
 
         actual_logs = logs.getvalue()
         expected_logs = [
-            "Cache miss due to new autograd node: torch::autograd::GraphRoot (NodeCall 0) with key size 39, previous key sizes=[]",
+            self.gen_cache_miss_log_prefix()
+            + "torch::autograd::GraphRoot (NodeCall 0) with key size 39, previous key sizes=[]",
         ]
         for expected in expected_logs:
             self.assertTrue(expected in actual_logs)
@@ -3451,7 +3466,7 @@ def fn():
                 fn()
 
         unexpected_logs = [
-            "Cache miss due to new autograd node: torch::autograd::GraphRoot (NodeCall 0)"
+            self.gen_cache_miss_log_prefix() + "torch::autograd::GraphRoot (NodeCall 0)"
         ]
 
         self.assertEqual(sum(1 for e in unexpected_logs if e in logs.getvalue()), 0)
 
@@ -182,7 +182,6 @@ def __contains__(self, item):
     "dynamo/test_misc",
     "inductor/test_cpu_repro",
     "inductor/test_cpu_select_algorithm",
-    "inductor/test_aot_inductor_arrayref",
     "inductor/test_torchinductor_codegen_dynamic_shapes",
     "lazy/test_meta_kernel",
     "onnx/test_utility_funs",
@@ -240,7 +239,6 @@ def __contains__(self, item):
     # some false errors
     "doctests",
     # new failures to investigate and fix
-    "cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic",
     "test_tensorboard",
     # onnx + protobuf failure, see
     # https://github.com/protocolbuffers/protobuf/issues/22104
 
@@ -142,6 +142,9 @@ class GetGenericDictGuardAccessor(GuardAccessor): ...
 class TypeDictGuardAccessor(GuardAccessor): ...
 class TypeMROGuardAccessor(GuardAccessor): ...
 
+class GetAttrGuardAccessor(GuardAccessor):
+    def get_attr_name(self) -> str: ...
+
 def install_object_aliasing_guard(
     guard_managers: list[GuardManager],
     tensor_names: list[str],
 
@@ -355,7 +355,7 @@ def find_tag_safe_roots(self):
         def visit_dict_manager(node):
             # Just recurse through the key and value dict managers and check if
             # all of them are tag safe nodes.
-            assert node.is_guarded_value_dict()
+            assert issubclass(node.get_type_of_guarded_value(), dict)
 
             tag_safe_roots = []
             is_subtree_tag_safe = True
@@ -394,12 +394,12 @@ def visit_manager(node):
                 # If the node guards a tensor, mark it tag safe only if there
                 # are no accessors. Presence of accessors means presence of
                 # symbolic shape guards.
-                if node.is_guarded_value_tensor():
+                if issubclass(node.get_type_of_guarded_value(), torch.Tensor):
                     if node.has_no_accessors() and not node.has_object_aliasing_guard():
                         node.mark_tag_safe()
                 else:
                     node.mark_tag_safe()
-            elif node.is_guarded_value_dict():
+            elif issubclass(node.get_type_of_guarded_value(), dict):
                 accessors = node.get_accessors()
                 child_mgrs = node.get_child_managers()
                 is_subtree_tag_safe = all(
@@ -408,7 +408,7 @@ def visit_manager(node):
                 )
                 if is_subtree_tag_safe:
                     node.mark_tag_safe()
-            elif node.is_guarded_value_nn_module():
+            elif issubclass(node.get_type_of_guarded_value(), torch.nn.Module):
                 accessors = node.get_accessors()
                 child_mgrs = node.get_child_managers()
                 is_subtree_tag_safe = all(
@@ -434,7 +434,7 @@ def visit(node):
 
         tag_safe_roots = visit(self.root)
         for node in tag_safe_roots:
-            if node.is_guarded_value_nn_module():
+            if issubclass(node.get_type_of_guarded_value(), torch.nn.Module):
                 node.mark_tag_safe_root()
 
     def populate_diff_guard_manager(self):
@@ -468,7 +468,7 @@ def get_manager_line(self, guard_manager, accessor_str=None):
         s = t + ": source=" + source
         if accessor_str:
             s += ", " + accessor_str
-        s += f", type={guard_manager.type_of_guarded_value()}"
+        s += f", type={guard_manager.get_type_of_guarded_value()}"
         s += f", tag_safe=({guard_manager.is_tag_safe()}, {guard_manager.is_tag_safe_root()})"
         return s
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-11ec6354315768a85da41032535e3b7b99c5f706`
	`1`	`+f7888497a1eb9e98d4c07537f0d0bcfe180d1363`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-29ae4c76c026185f417a25e841d2cd5e65f087a3`
	`1`	`+b6a5b82b9948b610fa4c304d0d869c82b8f17db1`