8000 [ca][dynamo] always run eager checkpoint region's recomputation in eager · pytorch/pytorch@eeeafba · GitHub
[go: up one dir, main page]

Skip to content

Commit eeeafba

Browse files
committed
[ca][dynamo] always run eager checkpoint region's recomputation in eager
ghstack-source-id: 11023ed Pull Request resolved: #153300
1 parent eb4eada commit eeeafba

File tree

3 files changed

+28
-19
lines changed

3 files changed

+28
-19
lines changed

test/dynamo_expected_failures/TestAutograd.test_access_saved_tensor_twice_without_recomputation_works

Whitespace-only changes.

test/inductor/test_compiled_autograd.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4206,10 +4206,13 @@ def wrap_test_class(orig_cls):
42064206
):
42074207
dct[name] = unittest.expectedFailure
42084208
elif name.startswith("test_"):
4209+
backend = lookup_backend(name)
4210+
if not HAS_CUDA and backend == "inductor":
4211+
continue
42094212
ctxs = [
42104213
compiled_autograd._enable(
42114214
make_compiler_fn(
4212-
backend=lookup_backend(name),
4215+
backend=backend,
42134216
fullgraph=name not in known_graph_breaks_tests,
42144217
)
42154218
),
@@ -4302,6 +4305,8 @@ def wrap_test_class(orig_cls):
43024305
"test_full_backward_hook_double_backward", # _pack_with_none
43034306
"test_grad_mode_restored_reentrant", # assertTrue
43044307
"test_multi_grad_any_hooks", # register_multi_grad_hook
4308+
"test_saved_variable_packing_unpacking_did_not_save_original_with_hooks", # register_hooks
4309+
"test_graph_save_on_cpu", # dynamo disabled
43054310
}
43064311

43074312
test_contexts = {
@@ -4351,37 +4356,27 @@ def wrap_test_class(orig_cls):
43514356
"test_custom_autograd_no_early_free", # batched gradients
43524357
"test_lobpcg", # NaNs
43534358
# Uncategorized
4359+
"test_autograd_simple_views_python", # gradient is None
4360+
"test_function_returns_undefined_tensor", # gradient is None
4361+
"test_input_buffer_accum", # add(sparse, dense) not supported
43544362
},
43554363
"eager": { # will be run without torch.compiling the CA graph
43564364
"test_setup_context_when_forward_has_default_args", # autograd.Function with class methods
43574365
"test_accumulate_grad_tensor_reference", # Out of bounds: frame_state_entry.stride[i] is None
43584366
"test_custom_function_exception", # torch.no_grad(), torch._dynamo.exc.Unsupported: missing: WITH_EXCEPT_START
43594367
"test_to_sparse_backward", # Out of bounds: frame_state_entry.stride[i] is None
4360-
"test_autograd_simple_views_python", # gradient is None
4361-
"test_function_returns_undefined_tensor", # gradient is None
43624368
"test_naughty_autograd_function_stashing_ctx", # bytecode issue
43634369
"test_unrelated_inputs", # gradient batching rule not implemented for aten::sym_size.int
43644370
"test_custom_function_non_tensor_inputs_outputs", # gradient batching rule not implemented for aten::sym_size.int
43654371
"test_return_duplicate", # gradient batching rule not implemented for aten::sym_size.int
43664372
"test_return_duplicate_inplace", # gradient batching rule not implemented for aten::sym_size.int
43674373
"test_setitem", # CopySlices accuracy error
4368-
"test_save_on_cpu_and_checkpoint", # https://github.com/pytorch/pytorch/issues/147565
4369-
"test_checkpoint_detects_non_determinism", # different error
4370-
"test_checkpointing_non_reentrant_autocast_cpu", # saved != recompute
4371-
"test_checkpointing_non_reentrant_autocast_gpu", # saved != recompute
43724374
"test_checkpointing_without_reentrant_saved_object_identity", # same as https://github.com/pytorch/pytorch/issues/136193
4373-
"test_saved_variable_packing_unpacking_did_not_save_original_with_hooks", # register_hooks multiple times
4374-
"test_saved_variable_saved_original_inplace_detach", # RuntimeError not raised
4375-
"test_access_saved_tensor_twice_without_recomputation_works", # saved != recompute
4376-
"test_checkpointing_without_reentrant_dataparallel", # https://github.com/pytorch/pytorch/issues/127115
4377-
"test_checkpointing", # takes very very long
4378-
"test_checkpointing_without_reentrant_input_requires_grad_False", # takes very very long
4379-
"test_checkpointing_without_reentrant_input_requires_grad_True", # takes very very long
4380-
"test_checkpointing_without_reentrant_memory_savings", # takes very very long
43814375
"test_dtensor_different_gradient_placement", # Dynamo failed to run FX node with fake tensors
43824376
"test_dtensor_noncontiguous_output", # Dynamo failed to run FX node with fake tensors
43834377
"test_dtensor_partial_placement_graph_output", # Dynamo failed to run FX node with fake tensors
43844378
"test_unwrap_async_collective_tensor_tangent", # AttributeError: 'PlainTensorMeta' object has no attribute 'attrs'
4379+
"test_graph_save_on_cpu", # PGO strides check out of bounds
43854380
},
43864381
"aot_eager": { # will be run with torch.compile(backend="eager")
43874382
# Category: FakeTensor
@@ -4390,10 +4385,7 @@ def wrap_test_class(orig_cls):
43904385
"test_grad_batched_grad", # torch._subclasses.fake_tensor.UnsupportedFakeTensorException: meta converter nyi
43914386
"test_scalar_grad_mixed_device", # Fake Tensors aren't propagating device properly for 0-dim grads
43924387
},
4393-
"inductor": { # will be run with torch.compile(backend="aot_eager")
4394-
"test_input_buffer_accum", # does not support sparse_grad=True: https://github.com/pytorch/pytorch/issues/120267
4395-
"test_graph_save_on_cpu", # does not support pin_memory: https://github.com/pytorch/pytorch/issues/134173
4396-
},
4388+
"inductor": {}, # will be run with torch.compile(backend="aot_eager")
43974389
# tests not present in this dict will be run with torch.compile(backend="inductor")
43984390
}
43994391

torch/utils/checkpoint.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ def backward(ctx, *args):
328328
def noop_context_fn():
329329
return contextlib.nullcontext(), contextlib.nullcontext()
330330

331+
# Note: [torch.compile and checkpoint]
331332
# TorchDynamo does not step inside utils.checkpoint function. The flow
332333
# looks likes this
333334
# 1) TorchDynamo tries to wrap utils.checkpoint in a HigherOrderOp by
@@ -1106,6 +1107,8 @@ def pack_hook(x):
11061107
frame.x_metadatas.append(frame.metadata_fn(x))
11071108
return holder
11081109

1110+
# See Note: [compiled autograd and checkpoint unpack hook]
1111+
@torch._disable_dynamo
11091112
def unpack_hook(holder):
11101113
gid = torch._C._current_graph_task_id()
11111114
if gid == -1:
@@ -1541,3 +1544,17 @@ def recompute_fn(*inputs):
15411544
)
15421545

15431546
return
1547+
1548+
# Note: [compiled autograd and checkpoint unpack hook]
1549+
# When tracing via compiled autograd, this hook will be visible to the
1550+
# compiler if the forward of this checkpointed region ran in eager.
1551+
# If the forward had ran under compile, it would have been wrapped in a
1552+
# higher order op. See Note: [torch.compile and checkpoint].
1553+
#
1554+
# Since we run the recomputation hook under a enable_grad context,
1555+
# AOTDispatch will trace a joint graph for this hook, and may
1556+
# save different activations than in eager. This conflicts with the
1557+
# strict activation count checks in `frame.check_recomputed_tensors_match`.
1558+
# So, we disable this hook to force it to recompute eager checkpointed regions
1559+
# in eager. This could be removed if we can disable the partitioner for this
1560+
# graph segment.

0 commit comments

Comments
 (0)
0