8000 feat: Update llama.cpp · rob-field/llama-cpp-python@2bc1d97 · GitHub
[go: up one dir, main page]

Skip to content

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 2bc1d97

Browse files
committed
feat: Update llama.cpp
1 parent 803924b commit 2bc1d97

File tree

3 files changed

+15
-46
lines changed

3 files changed

+15
-46
lines changed

llama_cpp/_internals.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -805,15 +805,10 @@ def add_penalties(
805805
ignore_eos: bool,
806806
):
807807
sampler = llama_cpp.llama_sampler_init_penalties(
808-
n_vocab,
809-
special_eos_id,
810-
linefeed_id,
811808
penalty_last_n,
812809
penalty_repeat,
813810
penalty_freq,
814811
penalty_present,
815-
penalize_nl,
816-
ignore_eos,
817812
)
818813
self._add_sampler(sampler)
819814

llama_cpp/llama_cpp.py

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,17 @@
256256
# // note: these values should be synchronized with ggml_rope
257257
# // TODO: maybe move this enum to ggml.h (ggml_rope_type)
258258
# enum llama_rope_type {
259-
# LLAMA_ROPE_TYPE_NONE = -1,
260-
# LLAMA_ROPE_TYPE_NORM = 0,
261-
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, 8000
259+
# LLAMA_ROPE_TYPE_NONE = -1,
260+
# LLAMA_ROPE_TYPE_NORM = 0,
261+
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
262+
# LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
263+
# LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
262264
# };
263265
LLAMA_ROPE_TYPE_NONE = -1
264266
LLAMA_ROPE_TYPE_NORM = 0
265267
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2
268+
LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE = 8
269+
LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION = 24
266270

267271

268272
# enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
@@ -1265,6 +1269,7 @@ def llama_rope_freq_scale_train(model: llama_model_p, /) -> float:
12651269
# // Functions to access the model's GGUF metadata scalar values
12661270
# // - The functions return the length of the string on success, or -1 on failure
12671271
# // - The output string is always null-terminated and cleared on failure
1272+
# // - When retrieving a string, an extra byte must be allocated to account for the null terminator
12681273
# // - GGUF array values are not supported by these functions
12691274

12701275

@@ -1378,18 +1383,6 @@ def llama_model_n_params(model: llama_model_p, /) -> int:
13781383
...
13791384

13801385

1381-
# // Get a llama model tensor
1382-
# LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
1383-
@ctypes_function(
1384-
"llama_get_model_tensor", [llama_model_p_ctypes, ctypes.c_char_p], ctypes.c_void_p
1385-
)
1386-
def llama_get_model_tensor(
1387-
model: llama_model_p, name: Union[ctypes.c_char_p, bytes], /
1388-
) -> ctypes.c_void_p:
1389-
"""Get a llama model tensor"""
1390-
...
1391-
1392-
13931386
# // Returns true if the model contains an encoder that requires llama_encode() call
13941387
# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
13951388
@ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
@@ -3336,41 +3329,22 @@ def llama_sampler_init_grammar(
33363329
...
33373330

33383331

3332+
# /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
33393333
# LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
3340-
# int32_t n_vocab, // llama_n_vocab()
3341-
# llama_token special_eos_id, // llama_token_eos()
3342-
# llama_token linefeed_id, // llama_token_nl()
3343-
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
3344-
# float penalty_repeat, // 1.0 = disabled
3345-
# float penalty_freq, // 0.0 = disabled
3346-
# float penalty_present, // 0.0 = disabled
3347-
# bool penalize_nl, // consider newlines as a repeatable token
3348-
# bool ignore_eos); // ignore the end-of-sequence token
3334+
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
3335+
# float penalty_repeat, // 1.0 = disabled
3336+
# float penalty_freq, // 0.0 = disabled
3337+
# float penalty_present); // 0.0 = disabled
33493338
@ctypes_function(
33503339
"llama_sampler_init_penalties",
3351-
[
3352-
ctypes.c_int32,
3353-
llama_token,
3354-
llama_token,
3355-
ctypes.c_int32,
3356-
ctypes.c_float,
3357-
ctypes.c_float,
3358-
ctypes.c_float,
3359-
ctypes.c_bool,
3360-
ctypes.c_bool,
3361-
],
3340+
[ctypes.c_int32, ctypes.c_float, ctypes.c_float, ctypes.c_float],
33623341
llama_sampler_p_ctypes,
33633342
)
33643343
def llama_sampler_init_penalties(
3365-
n_vocab: int,
3366-
special_eos_id: int,
3367-
linefeed_id: int,
33683344
penalty_last_n: int,
33693345
penalty_repeat: float,
33703346
penalty_freq: float,
33713347
penalty_present: float,
3372-
penalize_nl: bool,
3373-
ignore_eos: bool,
33743348
/,
33753349
) -> llama_sampler_p:
33763350
...

vendor/llama.cpp

0 commit comments

Comments
 (0)
0