8000 Update llama.cpp · coderonion/llama-cpp-python@50f5c74 · GitHub
[go: up one dir, main page]

Skip to content

Commit 50f5c74

Browse files
committed
Update llama.cpp
1 parent 43ba152 commit 50f5c74

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,9 @@ class llama_model_kv_override(ctypes.Structure):
648648
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
649649
# const float * tensor_split;
650650

651+
# // comma separated list of RPC servers to use for offloading
652+
# const char * rpc_servers;
653+
651654
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
652655
# // If the provided progress_callback returns true, model loading continues.
653656
# // If it returns false, model loading is immediately aborted.
@@ -674,6 +677,7 @@ class llama_model_params(ctypes.Structure):
674677
split_mode (int): how to split the model across multiple GPUs
675678
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
676679
tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
680+
rpc_servers (ctypes.c_char_p): comma separated list of RPC servers to use for offloading
677681
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
678682
progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
679683
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
@@ -687,6 +691,7 @@ class llama_model_params(ctypes.Structure):
687691
split_mode: int
688692
main_gpu: int
689693
tensor_split: CtypesArray[ctypes.c_float]
694+
rpc_servers: ctypes.c_char_p
690695
progress_callback: Callable[[float, ctypes.c_void_p], bool]
691696
progress_callback_user_data: ctypes.c_void_p
692697
kv_overrides: CtypesArray[llama_model_kv_override]
@@ -700,6 +705,7 @@ class llama_model_params(ctypes.Structure):
700705
("split_mode", ctypes.c_int),
701706
("main_gpu", ctypes.c_int32),
702707
("tensor_split", ctypes.POINTER(ctypes.c_float)),
708+
("rpc_servers", ctypes.c_char_p),
703709
("progress_callback", llama_progress_callback),
704710
("progress_callback_user_data", ctypes.c_void_p),
705711
("kv_overrides", ctypes.POINTER(llama_model_kv_override)),

vendor/llama.cpp

0 commit comments

Comments
 (0)
0