pytorch
diff --git a/‎test/dynamo_expected_failures/TestAutograd.test_access_saved_tensor_twice_without_recomputation_works b/‎test/dynamo_expected_failures/TestAutograd.test_access_saved_tensor_twice_without_recomputation_works
diff --git a/‎test/inductor/test_compiled_autograd.py
Lines changed: 11 additions & 19 deletions b/‎test/inductor/test_compiled_autograd.py
Lines changed: 11 additions & 19 deletions
diff --git a/‎torch/utils/checkpoint.py
Lines changed: 17 additions & 0 deletions b/‎torch/utils/checkpoint.py
Lines changed: 17 additions & 0 deletions
@@ -4206,10 +4206,13 @@ def wrap_test_class(orig_cls):
         ):
             dct[name] = unittest.expectedFailure
         elif name.startswith("test_"):
+            backend = lookup_backend(name)
+            if not HAS_CUDA and backend == "inductor":
+                continue
             ctxs = [
                 compiled_autograd._enable(
                     make_compiler_fn(
-                        backend=lookup_backend(name),
+                        backend=backend,
                         fullgraph=name not in known_graph_breaks_tests,
                     )
                 ),
@@ -4302,6 +4305,8 @@ def wrap_test_class(orig_cls):
     "test_full_backward_hook_double_backward",  # _pack_with_none
     "test_grad_mode_restored_reentrant",  # assertTrue
     "test_multi_grad_any_hooks",  # register_multi_grad_hook
+    "test_saved_variable_packing_unpacking_did_not_save_original_with_hooks",  # register_hooks
+    "test_graph_save_on_cpu",  # dynamo disabled
 }
 
 test_contexts = {
@@ -4351,37 +4356,27 @@ def wrap_test_class(orig_cls):
         "test_custom_autograd_no_early_free",  # batched gradients
         "test_lobpcg",  # NaNs
         # Uncategorized
+        "test_autograd_simple_views_python",  # gradient is None
+        "test_function_returns_undefined_tensor",  # gradient is None
+        "test_input_buffer_accum",  # add(sparse, dense) not supported
     },
     "eager": {  # will be run without torch.compiling the CA graph
         "test_setup_context_when_forward_has_default_args",  # autograd.Function with class methods
         "test_accumulate_grad_tensor_reference",  # Out of bounds: frame_state_entry.stride[i] is None
         "test_custom_function_exception",  # torch.no_grad(), torch._dynamo.exc.Unsupported: missing: WITH_EXCEPT_START
         "test_to_sparse_backward",  # Out of bounds: frame_state_entry.stride[i] is None
-        "test_autograd_simple_views_python",  # gradient is None
-        "test_function_returns_undefined_tensor",  # gradient is None
         "test_naughty_autograd_function_stashing_ctx",  # bytecode issue
         "test_unrelated_inputs",  # gradient batching rule not implemented for aten::sym_size.int
         "test_custom_function_non_tensor_inputs_outputs",  # gradient batching rule not implemented for aten::sym_size.int
         "test_return_duplicate",  # gradient batching rule not implemented for aten::sym_size.int
         "test_return_duplicate_inplace",  # gradient batching rule not implemented for aten::sym_size.int
         "test_setitem",  # CopySlices accuracy error
-        "test_save_on_cpu_and_checkpoint",  # https://github.com/pytorch/pytorch/issues/147565
-        "test_checkpoint_detects_non_determinism",  # different error
-        "test_checkpointing_non_reentrant_autocast_cpu",  # saved != recompute
-        "test_checkpointing_non_reentrant_autocast_gpu",  # saved != recompute
         "test_checkpointing_without_reentrant_saved_object_identity",  # same as https://github.com/pytorch/pytorch/issues/136193
-        "test_saved_variable_packing_unpacking_did_not_save_original_with_hooks",  # register_hooks multiple times
-        "test_saved_variable_saved_original_inplace_detach",  # RuntimeError not raised
-        "test_access_saved_tensor_twice_without_recomputation_works",  # saved != recompute
-        "test_checkpointing_without_reentrant_dataparallel",  # https://github.com/pytorch/pytorch/issues/127115
-        "test_checkpointing",  # takes very very long
-        "test_checkpointing_without_reentrant_input_requires_grad_False",  # takes very very long
-        "test_checkpointing_without_reentrant_input_requires_grad_True",  # takes very very long
-        "test_checkpointing_without_reentrant_memory_savings",  # takes very very long
         "test_dtensor_different_gradient_placement",  # Dynamo failed to run FX node with fake tensors
         "test_dtensor_noncontiguous_output",  # Dynamo failed to run FX node with fake tensors
         "test_dtensor_partial_placement_graph_output",  # Dynamo failed to run FX node with fake tensors
         "test_unwrap_async_collective_tensor_tangent",  # AttributeError: 'PlainTensorMeta' object has no attribute 'attrs'
+        "test_graph_save_on_cpu",  # PGO strides check out of bounds
     },
     "aot_eager": {  # will be run with torch.compile(backend="eager")
         # Category: FakeTensor
@@ -4390,10 +4385,7 @@ def wrap_test_class(orig_cls):
         "test_grad_batched_grad",  # torch._subclasses.fake_tensor.UnsupportedFakeTensorException: meta converter nyi
         "test_scalar_grad_mixed_device",  # Fake Tensors aren't propagating device properly for 0-dim grads
     },
-    "inductor": {  # will be run with torch.compile(backend="aot_eager")
-        "test_input_buffer_accum",  # does not support sparse_grad=True: https://github.com/pytorch/pytorch/issues/120267
-        "test_graph_save_on_cpu",  # does not support pin_memory: https://github.com/pytorch/pytorch/issues/134173
-    },
+    "inductor": {},  # will be run with torch.compile(backend="aot_eager")
     # tests not present in this dict will be run with torch.compile(backend="inductor")
 }
 
 
@@ -328,6 +328,7 @@ def backward(ctx, *args):
 def noop_context_fn():
     return contextlib.nullcontext(), contextlib.nullcontext()
 
+# Note: [torch.compile and checkpoint]
 # TorchDynamo does not step inside utils.checkpoint function.  The flow
 # looks likes this
 #  1) TorchDynamo tries to wrap utils.checkpoint in a HigherOrderOp by
@@ -1106,6 +1107,8 @@ def pack_hook(x):
                     frame.x_metadatas.append(frame.metadata_fn(x))
             return holder
 
+        # See Note: [compiled autograd and checkpoint unpack hook]
+        @torch._disable_dynamo
         def unpack_hook(holder):
             gid = torch._C._current_graph_task_id()
             if gid == -1:
@@ -1541,3 +1544,17 @@ def recompute_fn(*inputs):
         )
 
     return
+
+# Note: [compiled autograd and checkpoint unpack hook]
+# When tracing via compiled autograd, this hook will be visible to the
+# compiler if the forward of this checkpointed region ran in eager.
+# If the forward had ran under compile, it would have been wrapped in a
+# higher order op. See Note: [torch.compile and checkpoint].
+#
+# Since we run the recomputation hook under a enable_grad context,
+# AOTDispatch will trace a joint graph for this hook, and may
+# save different activations than in eager. This conflicts with the
+# strict activation count checks in `frame.check_recomputed_tensors_match`.
+# So, we disable this hook to force it to recompute eager checkpointed regions
+# in eager. This could be removed if we can disable the partitioner for this
+# graph segment.