8000 #if CUDA_VERSION >= 11040 · pytorch/pytorch@4cbde6f · GitHub
[go: up one dir, main page]

Skip to content

Commit 4cbde6f

Browse files
committed
#if CUDA_VERSION >= 11040
1 parent 09709d5 commit 4cbde6f

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

aten/src/ATen/cuda/PeerToPeerAccess.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ bool get_p2p_access(int dev, int dev_to_access) {
3939
dev_to_access, " is not a device");
4040
TORCH_INTERNAL_ASSERT(num_devices_ >= 0, "p2p access cache not initialized");
4141

42-
#if CUDA_VERSION > 11040
42+
#if CUDA_VERSION >= 11040
4343
static bool using_cudaMallocAsync = std::strcmp(CUDACachingAllocator::allocatorBackend(),
4444
"cudaMallocAsync") == 0;
4545
#endif
@@ -55,7 +55,7 @@ bool get_p2p_access(int dev, int dev_to_access) {
5555
int access = 0;
5656
C10_CUDA_CHECK(cudaDeviceCanAccessPeer(&access, dev, dev_to_access));
5757
if (access) {
58-
#if CUDA_VERSION > 11040
58+
#if CUDA_VERSION >= 11040
5959
if (using_cudaMallocAsync) {
6060
// cudaMallocAsync pools are unaffected by cudaDeviceEnablePeerAccess.
6161
// We need pool-specific enablement. See
@@ -78,7 +78,7 @@ bool get_p2p_access(int dev, int dev_to_access) {
7878
} else {
7979
C10_CUDA_CHECK(err);
8080
}
81-
#if CUDA_VERSION > 11040
81+
#if CUDA_VERSION >= 11040
8282
}
8383
#endif
8484
cache = 1;

aten/src/ATen/native/cuda/Copy.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void copy_device_to_device(TensorIterator& iter,
9393
void *src = iter.data_ptr(1);
9494
size_t size = numel * iter.element_size(0);
9595
if (src != dst || src_device != dst_device) {
96-
#if CUDA_VERSION > 11040
96+
#if CUDA_VERSION >= 11040
9797
// Due to bizarre cuda driver intricacies, copies of
9898
// cudaMallocAsynced memory between devices that aren't
9999
// peer-to-peer-capable need "cudaMemcpyPeerAsync".
@@ -113,7 +113,7 @@ void copy_device_to_device(TensorIterator& iter,
113113
dst, src, size,
114114
cudaMemcpyDeviceToDevice,
115115
copy_stream));
116-
#if CUDA_VERSION > 11040
116+
#if CUDA_VERSION >= 11040
117117
}
118118
#endif
119119
}

c10/cuda/CUDACachingAllocator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1660,7 +1660,7 @@ void parseArgs() {
16601660
m_allocator_backend = kv[1];
16611661
used_cudaMallocAsync = (kv[1].compare("cudaMallocAsync") == 0);
16621662
if (used_cudaMallocAsync) {
1663-
#if CUDA_VERSION > 11040
1663+
#if CUDA_VERSION >= 11040
16641664
int version;
16651665
C10_CUDA_CHECK(cudaDriverGetVersion(&version));
16661666
TORCH_CHECK(version >= 11040,

c10/cuda/CUDAMallocAsyncAllocator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace cuda {
1515
namespace CUDACachingAllocator {
1616
namespace CudaMallocAsync {
1717

18-
#if CUDA_VERSION > 11040
18+
#if CUDA_VERSION >= 11040
1919
// CUDA device allocator that uses cudaMallocAsync to implement
2020
// the same interface as CUDACachingAllocator.cpp.
2121

0 commit comments

Comments
 (0)
0