10000 Cleanup and fix minor errors · pytorch/pytorch@00da106 · GitHub
[go: up one dir, main page]

Skip to content

Commit 00da106

Browse files
authored
Cleanup and fix minor errors
1 parent c68d7e5 commit 00da106

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

aten/src/ATen/native/cuda/CUDALoops.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,8 @@ __global__ void vectorized_elementwise_kernel(int N, func DF00 _t f, array_t data) {
188188
elementwise_kernel_helper(f, policy);
189189
} else { // if this block has a full `block_work_size` data to handle, use
190190
// vectorized memory access
191-
constexpr auto optimal_vec_size = vec_size;
192191
elementwise_kernel_helper(
193-
f, memory::policies::vectorized<optimal_vec_size, array_t, elems_per_thread<io_size>()>(data));
192+
f, memory::policies::vectorized<vec_size, array_t, elems_per_thread<io_size>()>(data));
194193
}
195194
#endif // __CUDA_ARCH__ == 900 || __CUDA_ARCH__ == 1000
196195
} else {
@@ -215,9 +214,8 @@ __global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) {
215214
elementwise_kernel_helper(f, policy);
216215
} else { // if this block has a full `block_work_size` data to handle, use
217216
// vectorized memory access
218-
constexpr auto optimal_vec_size = vec_size;
219217
elementwise_kernel_helper(
220-
f, memory::policies::vectorized<optimal_vec_size, array_t, elems_per_thread<io_size>()>(data));
218+
f, memory::policies::vectorized<vec_size, array_t, elems_per_thread<io_size>()>(data));
221219
}
222220
}
223221
}
@@ -248,6 +246,8 @@ __global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) {
248246
} else { // if this block has a full `block_work_size` data to handle, use
249247
// vectorized memory access
250248
constexpr auto optimal_vec_size = calc_optimal_vec_size<vec_size, io_size>();
249+
elementwise_kernel_helper(
250+
f, memory::policies::vectorized<optimal_vec_size, array_t, elems_per_thread<io_size>()>(data));
251251
}
252252
}
253253
#endif // USE_ROCM

0 commit comments

Comments
 (0)
0