8000 Update llama.cpp · ikili/llama-cpp-python@c67f786 · GitHub
[go: up one dir, main page]

Skip to content

Commit c67f786

Browse files
committed
Update llama.cpp
1 parent e34f441 commit c67f786

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

llama_cpp/llama_cpp.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,14 @@ def llama_mlock_supported() -> bool:
290290

291291
# // TODO: not great API - very likely to change
292292
# // Initialize the llama + ggml backend
293+
# // If numa is true, use NUMA optimizations
293294
# // Call once at the start of the program
294-
# LLAMA_API void llama_init_backend();
295-
def llama_init_backend():
296-
return _lib.llama_init_backend()
295+
# LLAMA_API void llama_init_backend(bool numa);
296+
def llama_init_backend(numa: c_bool):
297+
return _lib.llama_init_backend(numa)
297298

298299

299-
_lib.llama_init_backend.argtypes = []
300+
_lib.llama_init_backend.argtypes = [c_bool]
300301
_lib.llama_init_backend.restype = None
301302

302303

@@ -565,6 +566,27 @@ def llama_eval(
565566
_lib.llama_eval.restype = c_int
566567

567568

569+
# // Same as llama_eval, but use float matrix input directly.
570+
# LLAMA_API int llama_eval_embd(
571+
# struct llama_context * ctx,
572+
# const float * embd,
573+
# int n_tokens,
574+
# int n_past,
575+
# int n_threads);
576+
def llama_eval_embd(
577+
ctx: llama_context_p,
578+
embd, # type: Array[c_float]
579+
n_tokens: c_int,
580+
n_past: c_int,
581+
n_threads: c_int,
582+
) -> int:
583+
return _lib.llama_eval_embd(ctx, embd, n_tokens, n_past, n_threads)
584+
585+
586+
_lib.llama_eval_embd.argtypes = [llama_context_p, c_float_p, c_int, c_int, c_int]
587+
_lib.llama_eval_embd.restype = c_int
588+
589+
568590
# Convert the provided text into tokens.
569591
# The tokens pointer must be large enough to hold the resulting tokens.
570592
# Returns the number of tokens on success, no more than n_max_tokens
@@ -998,5 +1020,5 @@ def llama_print_system_info() -> bytes:
9981020
_llama_initialized = False
9991021

10001022
if not _llama_initialized:
1001-
llama_init_backend()
1023+
llama_init_backend(c_bool(False))
10021024
_llama_initialized = True

vendor/llama.cpp

0 commit comments

Comments
 (0)
0