8000 kompute : disable LLAMA_SPLIT_LAYER after ggerganov/llama.cpp#5321 · nomic-ai/llama.cpp@fad3795 · GitHub
[go: up one dir, main page]

Skip to content

Commit fad3795

Browse files
committed
kompute : disable LLAMA_SPLIT_LAYER after ggml-org#5321
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
1 parent 5a648da commit fad3795

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

llama.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4741,6 +4741,7 @@ static bool llm_load_tensors(
47414741
model.buft_layer[i] = llama_default_buffer_type_cpu(true);
47424742
}
47434743

4744+
#ifndef GGML_USE_KOMPUTE
47444745
if (split_mode == LLAMA_SPLIT_MODE_LAYER) {
47454746
// calculate the split points
47464747
int device_count = llama_get_device_count();
@@ -4778,7 +4779,9 @@ static bool llm_load_tensors(
47784779
} else {
47794780
model.buft_output = llama_default_buffer_type_cpu(true);
47804781
}
4781-
} else {
4782+
} else
4783+
#endif
4784+
{
47824785
ggml_backend_buffer_type_t split_buft;
47834786
if (split_mode == LLAMA_SPLIT_MODE_ROW) {
47844787
split_buft = llama_default_buffer_type_split(main_gpu, tensor_split);

0 commit comments

Comments
 (0)
0