8000 Update llama.cpp · sjanaX01/llama-cpp-python@359ae73 · GitHub
[go: up one dir, main page]

Skip to content

Commit 359ae73

Browse files
committed
Update llama.cpp
1 parent 7c898d5 commit 359ae73

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ class llama_context_params(Structure):
526526
# bool quantize_output_tensor; // quantize output.weight
527527
# bool only_copy; 8000 // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
528528
# bool pure; // disable k-quant mixtures and quantize all tensors to the same type
529+
# void * imatrix; // pointer to importance matrix data
529530
# } llama_model_quantize_params;
530531
class llama_model_quantize_params(Structure):
531532
"""Parameters for llama_model_quantize
@@ -537,6 +538,7 @@ class llama_model_quantize_params(Structure):
537538
quantize_output_tensor (bool): quantize output.weight
538539
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
539540
pure (bool): disable k-quant mixtures and quantize all tensors to the same type
541+
imatrix (ctypes.c_void_p): pointer to importance matrix data
540542
"""
541543

542544
_fields_ = [
@@ -545,6 +547,8 @@ class llama_model_quantize_params(Structure):
545547
("allow_requantize", c_bool),
546548
("quantize_output_tensor", c_bool),
547549
("only_copy", c_bool),
550+
("pure", c_bool),
551+
("imatrix", c_void_p),
548552
]
549553

550554

vendor/llama.cpp

0 commit comments

Comments
 (0)
0