8000 feat: Update llama.cpp · ducky777/llama-cpp-python@fdce078 · GitHub
[go: up one dir, main page]

Skip to content

Commit fdce078

Browse files
committed
feat: Update llama.cpp
1 parent c2a234a commit fdce078

File tree

4 files changed

+44
-10
lines changed

4 files changed

+44
-10
lines changed

llama_cpp/llama.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def __init__(
9898
lora_scale: float = 1.0,
9999
lora_path: Optional[str] = None,
100100
# Backend Params
101-
numa: bool = False,
101+
numa: Union[bool, int] = False,
102102
# Chat Format Params
103103
chat_format: Optional[str] = None,
104104
chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None,
@@ -166,7 +166,7 @@ def __init__(
166166
last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
167167
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
168168
lora_path: Path to a LoRA file to apply to the model.
169-
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
169+
numa: numa policy
170170
chat_format: String specifying the chat format to use when calling create_chat_completion.
171171
chat_handler: Optional chat handler to use when calling create_chat_completion.
172172
draft_model: Optional draft model to use for speculative decoding.
@@ -183,12 +183,18 @@ def __init__(
183183

184184
set_verbose(verbose)
185185

186-
self.numa = numa
187186
if not Llama.__backend_initialized:
188187
with suppress_stdout_stderr(disable=verbose):
189-
llama_cpp.llama_backend_init(self.numa)
188+
llama_cpp.llama_backend_init()
190189
Llama.__backend_initialized = True
191190

191+
if isinstance(numa, bool):
192+
self.numa = llama_cpp.GGML_NUMA_STRATEGY_DISTRIBUTE if numa else llama_cpp.GGML_NUMA_STRATEGY_DISABLED
193+
194+
if self.numa != llama_cpp.GGML_NUMA_STRATEGY_DISABLED:
195+
with suppress_stdout_stderr(disable=verbose):
196+
llama_cpp.llama_numa_init(self.numa)
197+
192198
self.model_path = model_path
193199

194200
# Model Params

llama_cpp/llama_cpp.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -697,17 +697,45 @@ def llama_model_quantize_default_params() -> llama_model_quantize_params:
697697
# // If numa is true, use NUMA optimizations
698698
# // Call once at the start of the program
699699
# LLAMA_API void llama_backend_init(bool numa);
700-
def llama_backend_init(numa: Union[c_bool, bool]):
700+
# LLAMA_API void llama_backend_init(void);
701+
def llama_backend_init():
701702
"""Initialize the llama + ggml backend
702703
If numa is true, use NUMA optimizations
703704
Call once at the start of the program"""
704-
return _lib.llama_backend_init(numa)
705+
return _lib.llama_backend_init()
705706

706707

707-
_lib.llama_backend_init.argtypes = [c_bool]
708+
_lib.llama_backend_init.argtypes = []
708709
_lib.llama_backend_init.restype = None
709710

710711

712+
# // numa strategies
713+
# enum ggml_numa_strategy {
714+
# GGML_NUMA_STRATEGY_DISABLED = 0,
715+
# GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
716+
# GGML_NUMA_STRATEGY_ISOLATE = 2,
717+
# GGML_NUMA_STRATEGY_NUMACTL = 3,
718+
# GGML_NUMA_STRATEGY_MIRROR = 4,
719+
# GGML_NUMA_STRATEGY_COUNT
720+
# };
721+
GGML_NUMA_STRATEGY_DISABLED = 0
722+
GGML_NUMA_STRATEGY_DISTRIBUTE = 1
723+
GGML_NUMA_STRATEGY_ISOLATE = 2
724+
GGML_NUMA_STRATEGY_NUMACTL = 3
725+
GGML_NUMA_STRATEGY_MIRROR = 4
726+
GGML_NUMA_STRATEGY_COUNT = 5
727+
728+
729+
# //optional:
730+
# LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa);
731+
def llama_numa_init(numa: int):
732+
return _lib.llama_numa_init(numa)
733+
734+
735+
_lib.llama_numa_init.argtypes = [c_int]
736+
_lib.llama_numa_init.restype = None
737+
738+
711739
# // Call once at the end of the program - currently only used for MPI
712740
# LLAMA_API void llama_backend_free(void);
713741
def llama_backend_free():

llama_cpp/server/settings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import multiprocessing
44

5-
from typing import Optional, List, Literal
5+
from typing import Optional, List, Literal, Union
66
from pydantic import Field
77
from pydantic_settings import BaseSettings
88

@@ -108,7 +108,7 @@ class ModelSettings(BaseSettings):
108108
description="Path to a LoRA file to apply to the model.",
109109
)
110110
# Backend Params
111-
numa: bool = Field(
111+
numa: Union[bool, int] = Field(
112112
default=False,
113113
description="Enable NUMA support.",
114114
)

vendor/llama.cpp

0 commit comments

Comments
 (0)
0