8000 feat: Update llama.cpp · iamlemec/llama-cpp-python@159cc4e · GitHub
[go: up one dir, main page]

Skip to content

Commit 159cc4e

Browse files
committed
feat: Update llama.cpp
1 parent 893a27a commit 159cc4e

File tree

3 files changed

+33
-11
lines changed

3 files changed

+33
-11
lines changed

llama_cpp/_internals.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -181,20 +181,20 @@ def tokenize(self, text: bytes, add_bos: bool, special: bool):
181181
)
182182
return list(tokens[:n_tokens])
183183

184-
def token_to_piece(self, token: int) -> bytes:
184+
def token_to_piece(self, token: int, special: bool = False) -> bytes:
185185
assert self.model is not None
186186
buf = ctypes.create_string_buffer(32)
187-
llama_cpp.llama_token_to_piece(self.model, token, buf, 32)
187+
llama_cpp.llama_token_to_piece(self.model, token, buf, 32, special)
188188
return bytes(buf)
189189

190-
def detokenize(self, tokens: List[int]) -> bytes:
190+
def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
191191
assert self.model is not None
192192
output = b""
193193
size = 32
194194
buffer = (ctypes.c_char * size)()
195195
for token in tokens:
196196
n = llama_cpp.llama_token_to_piece(
197-
self.model, llama_cpp.llama_token(token), buffer, size
197+
self.model, llama_cpp.llama_token(token), buffer, size, special
198198
)
199199
assert n <= size
200200
output += bytes(buffer[:n])
@@ -597,13 +597,13 @@ def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> li
597597
return list(result)
598598

599599

600-
def _token_to_piece(model: _LlamaModel, token: int) -> str:
600+
def _token_to_piece(model: _LlamaModel, token: int, special: bool = False) -> str:
601601
assert model.model is not None
602602
result = (ctypes.c_char * 8)(0)
603-
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
603+
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
604604
if n_tokens < 0:
605605
result = (ctypes.c_char * -n_tokens)(0)
606-
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
606+
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
607607
if check != -n_tokens:
608608
raise RuntimeError(f"Failed to get piece: token={token}")
609609
else:

llama_cpp/llama_cpp.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,6 +2380,18 @@ def llama_token_get_type(
23802380
...
23812381

23822382

2383+
# // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
2384+
# LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
2385+
@ctypes_function(
2386+
"llama_token_is_eog", [llama_model_p_ctypes, llama_token], ctypes.c_bool
2387+
)
2388+
def llama_token_is_eog(
2389+
model: llama_model_p, token: Union[llama_token, int], /
2390+
) -> bool:
2391+
"""Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)"""
2392+
...
2393+
2394+
23832395
# // Special tokens
23842396

23852397

@@ -2434,7 +2446,7 @@ def llama_add_eos_token(model: llama_model_p, /) -> int:
24342446
...
24352447

24362448

2437-
# // codellama infill tokens
2449+
# // Codellama infill tokens
24382450
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
24392451
@ctypes_function("llama_token_prefix", [llama_model_p_ctypes], llama_token)
24402452
def llama_token_prefix(model: llama_model_p) -> int:
@@ -2524,18 +2536,21 @@ def llama_tokenize(
25242536
# // Uses the vocabulary in the provided context.
25252537
# // Does not write null terminator to the buffer.
25262538
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
2539+
# // @param special If true, special tokens are rendered in the output.
25272540
# LLAMA_API int32_t llama_token_to_piece(
25282541
# const struct llama_model * model,
25292542
# llama_token token,
25302543
# char * buf,
2531-
# int32_t length);
2544+
# int32_t length,
2545+
# bool special);
25322546
@ctypes_function(
25332547
"llama_token_to_piece",
25342548
[
25352549
llama_model_p_ctypes,
25362550
llama_token,
25372551
ctypes.c_char_p,
25382552
ctypes.c_int32,
2553+
ctypes.c_bool,
25392554
],
25402555
ctypes.c_int32,
25412556
)
@@ -2544,13 +2559,20 @@ def llama_token_to_piece(
25442559
token: Union[llama_token, int],
25452560
buf: Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]],
25462561
length: Union[ctypes.c_int, int],
2562+
special: Union[ctypes.c_bool, bool],
25472563
/,
25482564
) -> int:
25492565
"""Token Id -> Piece.
25502566
Uses the vocabulary in the provided context.
25512567
Does not write null terminator to the buffer.
25522568
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
2553-
"""
2569+
2570+
Args:
2571+
model: The model to use for tokenization.
2572+
token: The token to convert.
2573+
buf: The buffer to write the token to.
2574+
length: The length of the buffer.
2575+
special: If true, special tokens are rendered in the output."""
25542576
...
25552577

25562578

vendor/llama.cpp

0 commit comments

Comments
 (0)
0