10000 Update llama.cpp · MobinX/llama-cpp-python@896ab7b · GitHub
[go: up one dir, main page]

Skip to content

Commit 896ab7b

Browse files
committed
Update llama.cpp
1 parent 7bb0024 commit 896ab7b

File tree

2 files changed

+50
-7
lines changed

2 files changed

+50
-7
lines changed

llama_cpp/llama_cpp.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,23 @@ def llama_mlock_supported() -> bool:
326326
# // Initialize the llama + ggml backend
327327
# // If numa is true, use NUMA optimizations
328328
# // Call once at the start of the program
329-
# LLAMA_API void llama_init_backend(bool numa);
330-
def llama_init_backend(numa: c_bool):
331-
return _lib.llama_init_backend(numa)
329+
# LLAMA_API void llama_backend_init(bool numa);
330+
def llama_backend_init(numa: c_bool):
331+
return _lib.llama_backend_init(numa)
332332

333333

334-
_lib.llama_init_backend.argtypes = [c_bool]
335-
_lib.llama_init_backend.restype = None
334+
_lib.llama_backend_init.argtypes = [c_bool]
335+
_lib.llama_backend_init.restype = None
336+
337+
338+
# // Call once at the end of the program - currently only used for MPI
339+
# LLAMA_API void llama_backend_free();
340+
def llama_backend_free():
341+
return _lib.llama_backend_free()
342+
343+
344+
_lib.llama_backend_free.argtypes = []
345+
_lib.llama_backend_free.restype = None
336346

337347

338348
# LLAMA_API struct llama_model * llama_load_model_from_file(
@@ -819,6 +829,39 @@ def llama_sample_frequency_and_presence_penalties(
819829
_lib.llama_sample_frequency_and_presence_penalties.restype = None
820830

821831

832+
# /// @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806
833+
# /// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.
834+
# /// @params guidance_ctx A separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
835+
# /// @params scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
836+
# /// @params smooth_factor Smooth factor between guidance logits and original logits. 1.0f means only use guidance logits. 0.0f means only original logits.
837+
# LLAMA_API void llama_sample_classifier_free_guidance(
838+
# struct llama_context * ctx,
839+
# llama_token_data_array * candidates,
840+
# struct llama_context * guidance_ctx,
841+
# float scale,
842+
# float smooth_factor);
843+
def llama_sample_classifier_free_guidance(
844+
ctx: llama_context_p,
845+
candidates, # type: _Pointer[llama_token_data_array]
846+
guidance_ctx: llama_context_p,
847+
scale: c_float,
848+
smooth_factor: c_float,
849+
):
850+
return _lib.llama_sample_classifier_free_guidance(
851+
ctx, candidates, guidance_ctx, scale, smooth_factor
852+
)
853+
854+
855+
_lib.llama_sample_classifier_free_guidance.argtypes = [
856+
llama_context_p,
857+
llama_token_data_array_p,
858+
llama_context_p,
859+
c_float,
860+
c_float,
861+
]
862+
_lib.llama_sample_classifier_free_guidance.restype = None
863+
864+
822865
# @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
823866
# LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);
824867
def llama_sample_softmax(
@@ -1063,5 +1106,5 @@ def llama_print_system_info() -> bytes:
10631106
_llama_initialized = False
10641107

10651108
if not _llama_initialized:
1066-
llama_init_backend(c_bool(False))
1109+
llama_backend_init(c_bool(False))
10671110
_llama_initialized = True

vendor/llama.cpp

0 commit comments

Comments
 (0)
0