From 365a8936825aa6511395781de4a98d81c6759bf8 Mon Sep 17 00:00:00 2001 From: Grider Date: Fri, 12 Jul 2024 23:35:19 +0100 Subject: [PATCH] Actually use split_mode from model settings --- llama_cpp/server/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py index ad39c1004..c486f8885 100644 --- a/llama_cpp/server/model.py +++ b/llama_cpp/server/model.py @@ -223,6 +223,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama: **kwargs, # Model Params n_gpu_layers=settings.n_gpu_layers, + split_mode=settings.split_mode, main_gpu=settings.main_gpu, tensor_split=settings.tensor_split, vocab_only=settings.vocab_only,