8000 CUDA: fix race condition in FA vector kernels (#13742) · ochafik/llama.cpp@ffd0eae · GitHub
[go: up one dir, main page]

Skip to content

Commit ffd0eae

Browse files
CUDA: fix race condition in FA vector kernels (ggml-org#13742)
1 parent b775345 commit ffd0eae

File tree

2 files changed

+2
-0
lines changed

2 files changed

+2
-0
lines changed

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
212212
}
213213
}
214214
if (__all_sync(0xFFFFFFFF, skip)) {
215+
__syncthreads();
215216
continue;
216217
}
217218
#endif // GGML_USE_HIP

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
217217
}
218218
}
219219
if (__all_sync(0xFFFFFFFF, skip)) {
220+
__syncthreads();
220221
continue;
221222
}
222223
#endif // GGML_USE_HIP

0 commit comments

Comments
 (0)
0