8000 Update llama.cpp · sjanaX01/llama-cpp-python@89cce50 · GitHub
[go: up one dir, main page]

Skip to content

Commit 89cce50

Browse files
committed
Update llama.cpp
1 parent b8fc1c7 commit 89cce50

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ def _load_shared_library(lib_base_name: str):
9191
c_uint8_p = POINTER(c_uint8)
9292
c_size_t_p = POINTER(c_size_t)
9393

94+
# from ggml-backend.h
95+
# typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
96+
ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE(
97+
c_bool, c_void_p, c_bool, c_void_p
98+
)
99+
94100
# llama.h bindings
95101

96102
_lib.llama_max_devices.argtypes = []
@@ -448,6 +454,9 @@ class llama_model_params(Structure):
448454
# float yarn_beta_slow; // YaRN high correction dim
449455
# uint32_t yarn_orig_ctx; // YaRN original context size
450456

457+
# ggml_backend_sched_eval_callback cb_eval;
458+
# void * cb_eval_user_data;
459+
451460
# enum ggml_type type_k; // data type for K cache
452461
# enum ggml_type type_v; // data type for V cache
453462

@@ -475,6 +484,8 @@ class llama_context_params(Structure):
475484
yarn_beta_fast (float): YaRN low correction dim
476485
yarn_beta_slow (float): YaRN high correction dim
477486
yarn_orig_ctx (int): YaRN original context size
487+
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
488+
cb_eval_user_data (ctypes.c_void_p): user data for cb_eval
478489
type_k (int): data type for K cache
479490
type_v (int): data type for V cache
480491
mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
@@ -497,6 +508,8 @@ class llama_context_params(Structure):
497508
("yarn_beta_fast", c_float),
498509
("yarn_beta_slow", c_float),
499510
("yarn_orig_ctx", c_uint32),
511+
("cb_eval", ggml_backend_sched_eval_callback),
512+
("cb_eval_user_data", c_void_p),
500513
("type_k", c_int),
501514
("type_v", c_int),
502515
("mul_mat_q", c_bool),

vendor/llama.cpp

0 commit comments

Comments
 (0)
< 2F69 /div>
0