8000 feat: Update llama.cpp · richdougherty/llama-cpp-python@fea33c9 · GitHub
[go: up one dir, main page]

Skip to content

Commit fea33c9

Browse files
committed
feat: Update llama.cpp
1 parent 4d574bd commit fea33c9

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
264264
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
265265
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
266266
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
267+
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
267268

268269
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
269270
# };
@@ -295,6 +296,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
295296
LLAMA_FTYPE_MOSTLY_IQ3_M = 27
296297
LLAMA_FTYPE_MOSTLY_IQ2_S = 28
297298
LLAMA_FTYPE_MOSTLY_IQ2_M = 29
299+
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
298300
LLAMA_FTYPE_GUESSED = 1024
299301

300302
# enum llama_rope_scaling_type {
@@ -548,6 +550,7 @@ class llama_model_params(ctypes.Structure):
548550
# float yarn_beta_fast; // YaRN low correction dim
549551
# float yarn_beta_slow; // YaRN high correction dim
550552
# uint32_t yarn_orig_ctx; // YaRN original context size
553+
# float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
551554

552555
# ggml_backend_sched_eval_callback cb_eval;
553556
# void * cb_eval_user_data;
@@ -580,6 +583,7 @@ class llama_context_params(ctypes.Structure):
580583
yarn_beta_fast (float): YaRN low correction dim
581584
yarn_beta_slow (float): YaRN high correction dim
582585
yarn_orig_ctx (int): YaRN original context size
586+
defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
583587
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
584588
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
585589
type_k (int): data type for K cache
@@ -605,6 +609,7 @@ class llama_context_params(ctypes.Structure):
605609
("yarn_beta_fast", ctypes.c_float),
606610
("yarn_beta_slow", ctypes.c_float),
607611
("yarn_orig_ctx", ctypes.c_uint32),
612+
("defrag_thold", ctypes.c_float),
608613
("cb_eval", ggml_backend_sched_eval_callback),
609614
("cb_eval_user_data", ctypes.c_void_p),
610615
("type_k", ctypes.c_int),

vendor/llama.cpp

0 commit comments

Comments
 (0)
0