File tree Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -370,7 +370,7 @@ class DeviceCachingAllocator {
370
370
//
371
371
// Q. Why skip process_events if a capture might be underway?
372
372
// A. process_events involves cudaEventQueries, illegal during CUDA graph
373
- // capture.
373
+ // capture.
374
374
// Dumb simple solution: defer reclaiming these allocations until after
375
375
// capture. Cross-stream memory use is uncommon, so the deferral's
376
376
// effect on memory use during capture should be small.
Original file line number Diff line number Diff line change @@ -377,6 +377,17 @@ def test_out_of_memory(self):
377
377
tensor .fill_ (1 )
378
378
self .assertTrue ((tensor == 1 ).all ())
379
379
380
+ def test_out_of_memory_retry (self ):
381
+ total_memory = torch .cuda .get_device_properties (0 ).total_memory
382
+ oom_regex = "would exceed allowed memory" if TEST_CUDAMALLOCASYNC else \
383
+ "Tried to allocate"
384
+ size = int (total_memory * 0.5 )
385
+ a = torch .empty (size , dtype = torch .int8 , device = 'cuda' )
386
+ with self .assertRaisesRegex (RuntimeError , oom_regex ):
387
+ b = torch .empty (size , dtype = torch .int8 , device = 'cuda' )
388
+ del a
389
+ b = torch .empty (size , dtype = torch .int8 , device = 'cuda' )
390
+
380
391
def test_set_per_process_memory_fraction (self ):
381
392
# test invalid fraction value.
382
393
with self .assertRaisesRegex (TypeError , "Invalid type" ):
You can’t perform that action at this time.
0 commit comments