test_out_of_memory_retry

mcarilli · mcarilli · commit a208bdbce77e · 2022-02-01T11:10:16.000-07:00
diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp
@@ -370,7 +370,7 @@ class DeviceCachingAllocator {
       //
       // Q. Why skip process_events if a capture might be underway?
       // A. process_events involves cudaEventQueries, illegal during CUDA graph
-      // capture.
+      //    capture.
       //    Dumb simple solution: defer reclaiming these allocations until after
       //    capture. Cross-stream memory use is uncommon, so the deferral's
       //    effect on memory use during capture should be small.
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -377,6 +377,17 @@ def test_out_of_memory(self):
         tensor.fill_(1)
         self.assertTrue((tensor == 1).all())
 
+    def test_out_of_memory_retry(self):
+        total_memory = torch.cuda.get_device_properties(0).total_memory
+        oom_regex = "would exceed allowed memory" if TEST_CUDAMALLOCASYNC else \
+                    "Tried to allocate"
+        size = int(total_memory * 0.5)
+        a = torch.empty(size , dtype=torch.int8, device='cuda')
+        with self.assertRaisesRegex(RuntimeError, oom_regex):
+            b = torch.empty(size, dtype=torch.int8, device='cuda')
+        del a
+        b = torch.empty(size, dtype=torch.int8, device='cuda')
+
     def test_set_per_process_memory_fraction(self):
         # test invalid fraction value.
         with self.assertRaisesRegex(TypeError, "Invalid type"):