pytorch
diff --git a/‎.ci/docker/build.sh
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/build.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/docker/libtorch/build.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/libtorch/build.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/manywheel/build.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/manywheel/build.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements.txt
Lines changed: 1 addition & 1 deletion b/‎requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py
Lines changed: 1 addition & 1 deletion b/‎setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/distributed/tensor/test_attention.py
Lines changed: 1 addition & 11 deletions b/‎test/distributed/tensor/test_attention.py
Lines changed: 1 addition & 11 deletions
diff --git a/‎test/dynamo/test_decorators.py
Lines changed: 151 additions & 12 deletions b/‎test/dynamo/test_decorators.py
Lines changed: 151 additions & 12 deletions
diff --git a/‎test/dynamo/test_flat_apply.py
Lines changed: 1 addition & 1 deletion b/‎test/dynamo/test_flat_apply.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/dynamo/test_repros.py
Lines changed: 13 additions & 0 deletions b/‎test/dynamo/test_repros.py
Lines changed: 13 additions & 0 deletions
@@ -1,4 +1,8 @@
 #!/bin/bash
+# The purpose of this script is to:
+# 1. Extract the set of parameters to be used for a docker build based on the provided image name.
+# 2. Run docker build with the parameters found in step 1.
+# 3. Run the built image and print out the expected and actual versions of packages installed.
 
 set -ex
 
 
@@ -39,7 +39,7 @@ case ${GPU_ARCH_TYPE} in
         BASE_TARGET=rocm
         DOCKER_TAG=rocm${GPU_ARCH_VERSION}
         GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
-        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx942"
+        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
         ;;
     *)
 
@@ -97,7 +97,7 @@ case ${GPU_ARCH_TYPE} in
             DEVTOOLSET_VERSION="11"
             GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
         fi
-        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102"
+        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
         ;;
     xpu)
 
@@ -16,6 +16,6 @@ psutil
 pyyaml
 requests
 setuptools
-sympy==1.13.3
+sympy>=1.13.3
 types-dataclasses
 typing-extensions>=4.10.0
@@ -1120,7 +1120,7 @@ def main():
         "filelock",
         "typing-extensions>=4.10.0",
         'setuptools ; python_version >= "3.12"',
-        "sympy==1.13.3",
+        "sympy>=1.13.3",
         "networkx",
         "jinja2",
         "fsspec",
 
@@ -21,7 +21,6 @@
 from torch.distributed.tensor.parallel import parallelize_module
 from torch.nn.attention import sdpa_kernel, SDPBackend
 from torch.testing._internal.common_cuda import (
-    PLATFORM_SUPPORTS_CUDNN_ATTENTION,
     PLATFORM_SUPPORTS_FLASH_ATTENTION,
     PLATFORM_SUPPORTS_FUSED_ATTENTION,
     PLATFORM_SUPPORTS_MEM_EFF_ATTENTION,
@@ -42,8 +41,6 @@
     backends.append(SDPBackend.FLASH_ATTENTION)
 if PLATFORM_SUPPORTS_MEM_EFF_ATTENTION:
     backends.append(SDPBackend.EFFICIENT_ATTENTION)
-if PLATFORM_SUPPORTS_CUDNN_ATTENTION:
-    backends.append(SDPBackend.CUDNN_ATTENTION)
 
 rotater_enum_to_str = {
     _RotateMethod.ALL_GATHER: "allgather",
@@ -89,10 +86,6 @@ def _test_ring_attention_sdpa(
         rotater: _RotateMethod,
         test_forward_only: bool,
     ) -> None:
-        # TODO: DTensor does not support backward on SDPBackend.CUDNN_ATTENTION so far
-        if not test_forward_only and backend == SDPBackend.CUDNN_ATTENTION:
-            return
-
         def fn_eval(fn, *args, **kwargs):
             if test_forward_only:
                 with torch.no_grad():
@@ -116,10 +109,7 @@ def fn_eval(fn, *args, **kwargs):
         nheads = 8
         torch.manual_seed(10)
         dtype = (
-            torch.bfloat16
-            if backend == SDPBackend.FLASH_ATTENTION
-            or backend == SDPBackend.CUDNN_ATTENTION
-            else torch.float32
+            torch.bfloat16 if backend == SDPBackend.FLASH_ATTENTION else torch.float32
         )
 
         _cp_options.enable_load_balance = load_balance
 
@@ -204,6 +204,36 @@ def fn(a):
         self.assertEqual(cnts.frame_count, 1)
         self.assertEqual(cnts.op_count, 5)
 
+    def test_allow_in_graph_no_id_reuse(self):
+        cnts = torch._dynamo.testing.CompileCounter()
+
+        def do_allow_in_graph(x):
+            return x + 1
+
+        torch._dynamo.allow_in_graph(do_allow_in_graph)
+        del do_allow_in_graph
+
+        # `id(dont_allow_in_graph)` would likely match `id(do_allow_in_graph)`
+        # We want to make sure Dynamo always trace through
+        # `dont_allow_in_graph`, by checking for the explicit graph break.
+        def dont_allow_in_graph(x):
+            torch._dynamo.graph_break()
+            return x + 1
+
+        @torch.compile(backend=cnts)
+        def fn(a):
+            x = torch.add(a, 1)
+            x = torch.add(x, 1)
+            x = dont_allow_in_graph(x)
+            x = torch.add(x, 1)
+            x = torch.add(x, 1)
+            return x
+
+        fn(torch.randn(10))
+
+        # Check for graph break
+        self.assertEqual(cnts.frame_count, 3)
+
     def test_incorrect_usage_disallow_in_graph(self):
         with self.assertRaises(IncorrectUsage):
 
@@ -441,6 +471,49 @@ def fn(x, y):
         res = opt_fn(x, y)
         self.assertEqual(ref, res)
 
+    def test_nonstrict_trace_pre_existing_register_constant_type_guard(self):
+        class State:
+            def __init__(self, n):
+                self.n = n
+
+            def get_num(self):
+                torch._dynamo.graph_break()
+                return self.n
+
+            def __eq__(self, other):
+                return isinstance(other, State) and self.n == other.n
+
+            def __hash__(self):
+                return hash(self.n)
+
+        # Assume `State` is implemented in C, and the author didn't bother to
+        # provide a pytree decomposition for it, and its instances are safe to
+        # treat as a constant by `torch.compile`.
+        torch.utils._pytree.register_constant(State)
+
+        @torch._dynamo.nonstrict_trace
+        def trace_me(x, s):
+            return x * s.get_num()
+
+        cnts = torch._dynamo.testing.CompileCounterWithBackend("aot_eager")
+
+        @torch.compile(fullgraph=True, backend=cnts)
+        def fn(x, s):
+            res = trace_me(x, s)
+            return res
+
+        x = torch.ones(10)
+        # Make sure recompilation didn't happen.
+        self.assertEqual(cnts.frame_count, 0)
+        fn(x, State(42))
+        self.assertEqual(cnts.frame_count, 1)
+        fn(x, State(42))
+        self.assertEqual(cnts.frame_count, 1)
+
+        # Make sure recompilation did happen.
+        fn(x, State(41))
+        self.assertEqual(cnts.frame_count, 2)
+
     def test_nonstrict_trace_tuple_and_sym_int_output(self):
         @torch._dynamo.nonstrict_trace
         def trace_me(x):
@@ -602,6 +675,7 @@ def fn(p):
         except torch._dynamo.exc.Unsupported as e:
             msg = """
 For `nonstrict_trace`-ed function, the only allowed input types are basic types (e.g., torch.Tensor, int, float) or pytree containers of those. Here you are calling the function with arguments that contain a value of type <DecoratorTests.test_nonstrict_trace_custom_class_error.<locals>.Point>, please use one of the following to register the type with pytree:
+  * `torch.utils._pytree.register_constant`
   * `torch.utils._pytree.register_dataclass`
   * `torch.utils._pytree.register_pytree_node`
 """  # NOQA: B950
@@ -653,39 +727,104 @@ def fn(x, y):
         except torch._dynamo.exc.Unsupported as e:
             msg = """
 For `nonstrict_trace`-ed function, the only allowed input types are basic types (e.g., torch.Tensor, int, float) or pytree containers of those. Here you are calling the function with arguments that contain a value of type <DecoratorTests.test_nonstrict_trace_nested_custom_class_error.<locals>.Point>, please use one of the following to register the type with pytree:
+  * `torch.utils._pytree.register_constant`
   * `torch.utils._pytree.register_dataclass`
   * `torch.utils._pytree.register_pytree_node`
 """  # NOQA: B950
             self.assertIn(msg, str(e))
 
-    def test_nonstrict_trace_pytree_register_constant_error(self):
+    def test_nonstrict_newly_constructed_trace_register_constant_type_error(self):
+        class State:
+            def __init__(self, n):
+                self.n = n
+
+            def get_num(self):
+                torch._dynamo.graph_break()
+                return self.n
+
+            def __eq__(self, other):
+                return isinstance(other, State) and self.n == other.n
+
+            def __hash__(self):
+                return hash(self.n)
+
+        # Assume `State` is implemented in C, and the author didn't bother to
+        # provide a pytree decomposition for it, and its instances are safe to
+        # treat as a constant by `torch.compile`.
+        torch.utils._pytree.register_constant(State)
+
+        @torch._dynamo.nonstrict_trace
+        def trace_me(x, s):
+            return x * s.get_num()
+
+        @torch.compile(fullgraph=True, backend="aot_eager")
+        def fn(x):
+            s = State(10)
+            res = trace_me(x, s)
+            return res
+
+        try:
+            x = torch.ones(10)
+            fn(x)
+            self.assertFalse(True)  # must raise error before this
+        except torch._dynamo.exc.Unsupported as e:
+            msg = """
+You are calling a `nonstrict_trace`-ed function with an input that contains an object of type <DecoratorTests.test_nonstrict_newly_constructed_trace_register_constant_type_error.<locals>.State>, which was marked with `pytree.register_constant`. However, the object was constructed _inside_ the `torch.compile` region.
+
+Please construct the object _outside_ the `torch.compile` region, or submit an issue to GitHub.
+"""  # NOQA: B950
+            self.assertIn(msg, str(e))
+
+    def test_nonstrict_trace_object_in_context_error(self):
         class Point:
-            x: int
-            y: int
+            x: torch.Tensor
+            y: torch.Tensor
 
             def __init__(self, x, y):
                 self.x = x
                 self.y = y
 
-        torch.utils._pytree.register_constant(Point)
+        class PointTensor:
+            p: Point
+            t: torch.Tensor
+
+            def __init__(self, p, t):
+                self.p = p
+                self.t = t
+
+        torch.utils._pytree.register_pytree_node(
+            PointTensor,
+            lambda pt: ((pt.t,), pt.p),
+            lambda ts, p: PointTensor(p, ts[0]),
+        )
 
         @torch._dynamo.nonstrict_trace
-        def trace_me(x, p):
+        def trace_me(pt):
             torch._dynamo.graph_break()
-            return x * p.x + p.y
+            return pt.t + pt.p.x * pt.p.y
 
         @torch.compile(fullgraph=True, backend="aot_eager")
-        def fn(x, p):
-            res = trace_me(x, p)
-            return res + 1
+        def fn(x, y):
+            p = Point(x, y)
+            t = x + y
+            pt = PointTensor(p, t)
+            res = trace_me(pt)
+            return res
 
         try:
-            p = Point(3, 4)
-            fn(torch.ones(10), p)
+            x, y = torch.ones(10), torch.ones(1)
+            fn(x, y)
             self.assertFalse(True)  # must raise error before this
         except torch._dynamo.exc.Unsupported as e:
             msg = """
-This error is most likely due to a call to `nonstrict_trace`-ed function, where one of the argument contains object of a type that has been (or needs to be) `torch.utils._pytree.register_constant`-ed. We currently don't support that.
+You are calling a `nonstrict_trace`-ed function where one one of the inputs has been registered with a `pytree_flatten` that puts an object of type <DecoratorTests.test_nonstrict_trace_object_in_context_error.<locals>.Point> into the context.
+
+Please consider modifying that `pytree_flatten` to avoid putting the object into context, and apply one of the following to <DecoratorTests.test_nonstrict_trace_object_in_context_error.<locals>.Point>
+  * `torch.utils._pytree.register_constant`
+  * `torch.utils._pytree.register_dataclass`
+  * `torch.utils._pytree.register_pytree_node`
+
+If the above doesn't work, please subtmit an issue to GitHub.
 """  # NOQA: B950
             self.assertIn(msg, str(e))
 
 
@@ -24,7 +24,7 @@ def distance(a, b, norm):
         return (a.x - b.x).abs() + (a.y - b.y).abs()
 
 
-@dataclass
+@dataclass(frozen=True)
 class Norm:
     typ: str
 
 
@@ -4893,6 +4893,19 @@ def fn(x_weak, weight, y):
         self.assertEqual(ref, res)
         self.assertEqual(cnt.frame_count, 2)
 
+    def test_return_weakref(self):
+        def f(t):
+            t = t * 2
+            wr = weakref.ref(t)
+            return wr, t
+
+        ref_t = torch.randn(2, 2, requires_grad=True)
+        ref_y = f(ref_t)
+
+        t = ref_t.detach().clone().requires_grad_()
+        y = torch.compile(f, backend="eager", fullgraph=True)(t)
+        self.assertEqual(ref_y[0](), y[0]())
+
     def test_weakref_del(self):
         def fn(x_weak, y):
             x = x_weak()