8000 before i forget · pytorch/pytorch@b13f118 · GitHub
[go: up one dir, main page]

Skip to content

Commit b13f118

Browse files
committed
before i forget
1 parent 363fe3c commit b13f118

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

c10/cuda/CUDAMallocAsyncAllocator.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,15 @@ void free(void* ptr) {
263263
"ptr's stream uses vector is empty");
264264

265265
if (C10_UNLIKELY(capture_underway)) {
266-
if (it->second.captured) {
266+
if (!it->second.captured) {
267+
TORCH_WARN_ONCE("free() was called on an uncaptured allocation during graph capture. "
268+
"This may be benign, for example, a Python tensor in the capture "
269+
"might happen to shadow (use the same name as) an unrelated temporary "
270+
"tensor from somewhere before capture, pushing the earlier tensor "
271+
"out of scope.\n"
272+
"However, if the tensor we're freeing here IS used by the capture, "
273+
"freeing it is an error, and may cause illegal memory accesses or "
274+
"memory corruption during graph replay.");
267275
// See Note [Avoid freeing uncaptured ptrs during CUDA graph capture]
268276
// Remembers the raw pointer, not the iterator.
269277
// This forces notifyCaptureEnded to do another lookup,
@@ -272,9 +280,7 @@ void free(void* ptr) {
272280
ungraphed_ptrs_defer_free_until_no_capture.push_back(ptr);
273281
return;
274282
}
275-
}
276-
277-
if (C10_UNLIKELY(it->second.captured)) {
283+
} else if (C10_UNLIKELY(it->second.captured)) {
278284
TORCH_WARN("Attempting uncaptured free of a captured allocation. "
279285
"This is technically allowed, but may indicate you are losing "
280286
"the last user-visible tensor through which the allocation can "

torch/csrc/autograd/engine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ void GraphTask::exec_post_processing() {
583583
// surrounding the user's call to backward()). This has two benefits:
584584
// 1. caller_current_streams have been synced with leaf_streams, so callbacks may
585585
// safely access any grad.
586-
// 2. The callback's results can safely be used on (user-facing) caller_current_streams
586+
// 2. The callbacks' results can safely be used on (user-facing) caller_current_streams
587587
// after backward().
588588
c10::MultiStreamGuard g(caller_current_streams_filtered);
589589

0 commit comments

Comments
 (0)
0