@@ -264,6 +264,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
264
264
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
265
265
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
266
266
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
267
+ # LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
267
268
268
269
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
269
270
# };
@@ -295,6 +296,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
295
296
LLAMA_FTYPE_MOSTLY_IQ3_M = 27
296
297
LLAMA_FTYPE_MOSTLY_IQ2_S = 28
297
298
LLAMA_FTYPE_MOSTLY_IQ2_M = 29
299
+ LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
298
300
LLAMA_FTYPE_GUESSED = 1024
299
301
300
302
# enum llama_rope_scaling_type {
@@ -548,6 +550,7 @@ class llama_model_params(ctypes.Structure):
548
550
# float yarn_beta_fast; // YaRN low correction dim
549
551
# float yarn_beta_slow; // YaRN high correction dim
550
552
# uint32_t yarn_orig_ctx; // YaRN original context size
553
+ # float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
551
554
552
555
# ggml_backend_sched_eval_callback cb_eval;
553
556
# void * cb_eval_user_data;
@@ -580,6 +583,7 @@ class llama_context_params(ctypes.Structure):
580
583
yarn_beta_fast (float): YaRN low correction dim
581
584
yarn_beta_slow (float): YaRN high correction dim
582
585
yarn_orig_ctx (int): YaRN original context size
586
+ defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
583
587
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
584
588
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
585
589
type_k (int): data type for K cache
@@ -605,6 +609,7 @@ class llama_context_params(ctypes.Structure):
605
609
("yarn_beta_fast" , ctypes .c_float ),
606
610
("yarn_beta_slow" , ctypes .c_float ),
607
611
("yarn_orig_ctx" , ctypes .c_uint32 ),
612
+ ("defrag_thold" , ctypes .c_float ),
608
613
("cb_eval" , ggml_backend_sched_eval_callback ),
609
614
("cb_eval_user_data" , ctypes .c_void_p ),
610
615
("type_k" , ctypes .c_int ),
0 commit comments