Closed
Description
Discovered with #126341.
Problematic tests:
PYTORCH_TEST_WITH_DYNAMO=1 pytest test/test_autograd.py::TestAutograd::test_checkpointing_without_reentrant_arbitrary_input_output
and
PYTORCH_TEST_WITH_DYNAMO=1 pytest test/test_autograd.py::TestAutograd::test_checkpointing_without_reentrant_dataparallel
(fails on CI; cannot repro latter locally).
Logs:
Traceback (most recent call last):
File "/data/users/williamwen/pytorch2/torch/_dynamo/backends/debugging.py", line 33, in inner
return gm(*args)
File "/data/users/williamwen/pytorch2/torch/fx/graph_module.py", line 736, in call_wrapped
return self._wrapped_call(self, *args, **kwargs)
File "/data/users/williamwen/pytorch2/torch/fx/graph_module.py", line 315, in __call__
raise e
File "/data/users/williamwen/pytorch2/torch/fx/graph_module.py", line 302, in __call__
return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc]
File "/data/users/williamwen/pytorch2/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/users/williamwen/pytorch2/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "<eval_with_key>.1", line 7, in forward
linear = torch._C._nn.linear(l_dict_input_tensor_, l_self_layer_weight, None); l_dict_input_tensor_ = l_self_layer_weight = None
File "/data/users/williamwen/pytorch2/torch/utils/checkpoint.py", line 1077, in pack_hook
raise _StopRecomputationError
torch.utils.checkpoint._StopRecomputationError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data/users/williamwen/py310-env/lib/python3.10/unittest/case.py", line 59, in testPartExecutor
yield
File "/data/users/williamwen/py310-env/lib/python3.10/unittest/case.py", line 591, in run
self._callTestMethod(testMethod)
File "/data/users/williamwen/py310-env/lib/python3.10/unittest/case.py", line 549, in _callTestMethod
method()
File "/data/users/williamwen/pytorch2/torch/testing/_internal/common_utils.py", line 2756, in wrapper
method(*args, **kwargs)
File "/data/users/williamwen/pytorch2/test/test_autograd.py", line 7069, in test_checkpointing_without_reentrant_arbitrary_input_output
out_checkpoint.backward()
File "/data/users/williamwen/pytorch2/torch/_tensor.py", line 523, in backward
torch.autograd.backward(
File "/data/users/williamwen/pytorch2/torch/autograd/__init__.py", line 267, in backward
_engine_run_backward(
File "/data/users/williamwen/pytorch2/torch/autograd/graph.py", line 767, in _engine_run_backward
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/data/users/williamwen/pytorch2/torch/utils/checkpoint.py", line 1115, in unpack_hook
frame.recompute_fn(*args)
File "/data/users/williamwen/pytorch2/torch/utils/checkpoint.py", line 1401, in recompute_fn
fn(*args, **kwargs)
File "/data/users/williamwen/pytorch2/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data/users/williamwen/pytorch2/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/data/users/williamwen/pytorch2/test/test_autograd.py", line 7051, in forward
def forward(self, dict_input):
File "/data/users/williamwen/pytorch2/torch/_dynamo/eval_frame.py", line 548, in _fn
return fn(*args, **kwargs)
File "/data/users/williamwen/pytorch2/torch/_dynamo/backends/debugging.py", line 35, in inner
raise torch._dynamo.exc.TorchDynamoException(
torch._dynamo.exc.TorchDynamoException: Unexpected exception when running generated GraphModule
cc @chauhang @penguinwu @voznesenskym @EikanWang @jgong5 @Guobing-Chen @XiaobingSuper @zhuhaozhe @blzheng @wenzhe-nrv @jiayisunx @chenyang78 @kadeng @amjames @ezyang @msaroufim @bdhirsh @anijain2305