before i forget

mcarilli · mcarilli · commit b13f118c00aa · 2022-01-01T14:12:49.000-07:00
diff --git a/c10/cuda/CUDAMallocAsyncAllocator.cpp b/c10/cuda/CUDAMallocAsyncAllocator.cpp
@@ -263,7 +263,15 @@ void free(void* ptr) {
                         "ptr's stream uses vector is empty");
 
   if (C10_UNLIKELY(capture_underway)) {
-    if (it->second.captured) {
+    if (!it->second.captured) {
+      TORCH_WARN_ONCE("free() was called on an uncaptured allocation during graph capture. "
+                      "This may be benign, for example, a Python tensor in the capture "
+                      "might happen to shadow (use the same name as) an unrelated temporary "
+                      "tensor from somewhere before capture, pushing the earlier tensor "
+                      "out of scope.\n"
+                      "However, if the tensor we're freeing here IS used by the capture, "
+                      "freeing it is an error, and may cause illegal memory accesses or "
+                      "memory corruption during graph replay.");
       // See Note [Avoid freeing uncaptured ptrs during CUDA graph capture]
       // Remembers the raw pointer, not the iterator.
       // This forces notifyCaptureEnded to do another lookup,
@@ -272,9 +280,7 @@ void free(void* ptr) {
       ungraphed_ptrs_defer_free_until_no_capture.push_back(ptr);
       return;
     }
-  }
-
-  if (C10_UNLIKELY(it->second.captured)) {
+  } else if (C10_UNLIKELY(it->second.captured)) {
     TORCH_WARN("Attempting uncaptured free of a captured allocation. "
                "This is technically allowed, but may indicate you are losing "
                "the last user-visible tensor through which the allocation can "
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -583,7 +583,7 @@ void GraphTask::exec_post_processing() {
     // surrounding the user's call to backward()). This has two benefits:
     //  1. caller_current_streams have been synced with leaf_streams, so callbacks may
     //     safely access any grad.
-    //  2. The callback's results can safely be used on (user-facing) caller_current_streams
+    //  2. The callbacks' results can safely be used on (user-facing) caller_current_streams
     //     after backward().
     c10::MultiStreamGuard g(caller_current_streams_filtered);