8000 use stride=128 if built for tensor cores · Pints-AI/llama.cpp@6272b67 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6272b67

Browse files
committed
use stride=128 if built for tensor cores
1 parent dd71a35 commit 6272b67

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

ggml-cuda.cu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,12 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
468468
#endif // GGML_CUDA_PEER_MAX_BATCH_SIZE
469469

470470
#define MUL_MAT_SRC1_COL_STRIDE_MMQ 128
471-
#define MUL_MAT_SRC1_COL_STRIDE 4096
471+
472+
#ifdef CUDA_USE_TENSOR_CORES
473+
#define MUL_MAT_SRC1_COL_STRIDE 128
474+
#else
475+
#define MUL_MAT_SRC1_COL_STRIDE 4096
476+
#endif
472477

473478
#define MAX_STREAMS 8
474479
static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_DEVICES][MAX_STREAMS] = { { nullptr } };

0 commit comments

Comments
 (0)
0