8000 kompute : disable LLAMA_SPLIT_LAYER after ggerganov/llama.cpp#5321 · nomic-ai/llama.cpp@7cd9602 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 7cd9602

Browse files
committed
kompute : disable LLAMA_SPLIT_LAYER after ggml-org#5321
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
1 parent 800e8fd commit 7cd9602

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/llama.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5903,6 +5903,7 @@ static bool llm_load_tensors(
59035903
model.buft_layer[i] = llama_default_buffer_type_cpu(true);
59045904
}
59055905

5906+
#ifndef GGML_USE_KOMPUTE
59065907
if (split_mode == LLAMA_SPLIT_MODE_LAYER) {
59075908
// calculate the split points
59085909
int device_count = llama_get_device_count(model);
@@ -5940,7 +5941,9 @@ static bool llm_load_tensors(
59405941
} else {
59415942
model.buft_output = llama_default_buffer_type_cpu(true);
59425943
}
5943-
} else {
5944+
} else
5945+
#endif
5946+
{
59445947
ggml_backend_buffer_type_t split_buft;
59455948
if (split_mode == LLAMA_SPLIT_MODE_ROW) {
59465949
split_buft = llama_default_buffer_type_split(model, main_gpu, tensor_split);

0 commit comments

Comments
 (0)
0