8000 Update llama.cpp · trex99/llama-cpp-python@6473796 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6473796

Browse files
committed
Update llama.cpp
1 parent 15ee210 commit 6473796

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

llama_cpp/llama_cpp.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,8 @@ class llama_token_data_array(Structure):
252252

253253
llama_token_data_array_p = POINTER(llama_token_data_array)
254254

255-
# typedef void (*llama_progress_callback)(float progress, void *ctx);
256-
llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
255+
# typedef bool (*llama_progress_callback)(float progress, void *ctx);
256+
llama_progress_callback = ctypes.CFUNCTYPE(c_bool, c_float, c_void_p)
257257

258258

259259
# // Input data for llama_decode
@@ -347,7 +347,9 @@ class llama_model_kv_override(Structure):
347347
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
348348
# const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
349349

350-
# // called with a progress value between 0 and 1, pass NULL to disable
350+
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
351+
# // If the provided progress_callback returns true, model loading continues.
352+
# // If it returns false, model loading is immediately aborted.
351353
# llama_progress_callback progress_callback;
352354
# // context pointer passed to the progress callback
353355
# void * progress_callback_user_data;
@@ -367,7 +369,7 @@ class llama_model_params(Structure):
367369
n_gpu_layers (int): number of layers to store in VRAM
368370
main_gpu (int): the GPU that is used for scratch and small tensors
369371
tensor_split (ctypes.Array[ctypes.c_float]): how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
370-
progress_callback (llama_progress_callback): called with a progress value between 0 and 1, pass NULL to disable
372+
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
371373
progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
372374
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
373375
vocab_only (bool): only load the vocabulary, no weights

vendor/llama.cpp

0 commit comments

Comments
 (0)
0