CUDA: fix race condition in FA vector kernels (ggml-org#13742)

JohannesGaessler · web-flow · commit ffd0eae60b76 · 2025-05-24T11:46:19.000+02:00
diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
                 }
             }
             if (__all_sync(0xFFFFFFFF, skip)) {
+                __syncthreads();
                 continue;
             }
 #endif // GGML_USE_HIP
diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
                 }
             }
             if (__all_sync(0xFFFFFFFF, skip)) {
+                __syncthreads();
                 continue;
             }
 #endif // GGML_USE_HIP

Original file line number	Diff line number	Diff line change
`@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(`
`212`	`212`	`}`
`213`	`213`	`}`
`214`	`214`	`if (__all_sync(0xFFFFFFFF, skip)) {`
	`215`	`+ __syncthreads();`
`215`	`216`	`continue;`
`216`	`217`	`}`
`217`	`218`	`#endif // GGML_USE_HIP`
Original file line number	Diff line number	Diff line change
`@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(`
`217`	`217`	`}`
`218`	`218`	`}`
`219`	`219`	`if (__all_sync(0xFFFFFFFF, skip)) {`
	`220`	`+ __syncthreads();`
`220`	`221`	`continue;`
`221`	`222`	`}`
`222`	`223`	`#endif // GGML_USE_HIP`