8000 feat: Update llama.cpp · asusevski/llama-cpp-python@7e20e34 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7e20e34

Browse files
committed
feat: Update llama.cpp
1 parent 01bddd6 commit 7e20e34

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,24 @@ def llama_get_model_tensor(
14441444
...
14451445

14461446

1447+
# // Returns true if the model contains an encoder that requires llama_encode() call
1448+
# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
1449+
@ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
1450+
def llama_model_has_encoder(model: llama_model_p, /) -> bool:
1451+
"""Returns true if the model contains an encoder that requires llama_encode() call"""
1452+
...
1453+
1454+
1455+
# // For encoder-decoder models, this function returns id of the token that must be provided
1456+
# // to the decoder to start generating output sequence. For other models, it returns -1.
1457+
# LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
1458+
@ctypes_function("llama_model_decoder_start_token", [llama_model_p_ctypes], ctypes.c_int32)
1459+
def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
1460+
"""For encoder-decoder models, this function returns id of the token that must be provided
1461+
to the decoder to start generating output sequence. For other models, it returns -1."""
1462+
...
1463+
1464+
14471465
# // Returns 0 on success
14481466
# LLAMA_API uint32_t llama_model_quantize(
14491467
# const char * fname_inp,
@@ -2271,6 +2289,22 @@ def llama_batch_free(batch: llama_batch, /):
22712289
...
22722290

22732291

2292+
# // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2293+
# // Stores the encoder output internally for later use by the decoder cross-attention layers.
2294+
# // 0 - success
2295+
# // < 0 - error
2296+
# LLAMA_API int32_t llama_encode(
2297+
# struct llama_context * ctx,
2298+
# struct llama_batch batch);
2299+
@ctypes_function("llama_encode", [llama_context_p_ctypes, llama_batch], ctypes.c_int32)
2300+
def llama_encode(ctx: llama_context_p, batch: llama_batch, /) -> int:
2301+
"""Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2302+
Stores the encoder output internally for later use by the decoder cross-attention layers.
2303+
0 - success
2304+
< 0 - error"""
2305+
...
2306+
2307+
22742308
# // Positive return values does not mean a fatal error, but rather a warning.
22752309
# // 0 - success
22762310
# // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)

vendor/llama.cpp

0 commit comments

Comments
 (0)
0