8000 Merge branch 'main' into patch-1 · Tatrabbit/llama-cpp-python@6e1a73b · GitHub
[go: up one dir, main page]

Skip to content

Commit 6e1a73b

Browse files
authored
Merge branch 'main' into patch-1
2 parents 4100bde<
10000
/a> + 4887973 commit 6e1a73b

File tree

4 files changed

+28
-33
lines changed

4 files changed

+28
-33
lines changed

llama_cpp/llama.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,17 +445,17 @@ def detokenize(self, tokens: List[int]) -> bytes:
445445
"""
446446
assert self.model is not None
447447
output = b""
448-
size = 16
448+
size = 32
449449
buffer = (ctypes.c_char * size)()
450450
for token in tokens:
451-
n = llama_cpp.llama_token_to_str_with_model(
451+
n = llama_cpp.llama_token_to_piece_with_model(
452452
self.model, llama_cpp.llama_token(token), buffer, size
453453
)
454454
assert n <= size
455455
output += bytes(buffer[:n])
456456
# NOTE: Llama1 models automatically added a space at the start of the prompt
457457
# this line removes a leading space if the first token is a beginning of sentence token
458-
return output
458+
return output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output
459459

460460
def set_cache(self, cache: Optional[BaseLlamaCache]):
461461
"""Set the cache.

llama_cpp/llama_cpp.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -973,48 +973,43 @@ def llama_tokenize_with_model(
973973
_lib.llama_tokenize_with_model.restype = c_int
974974

975975

976-
# // Token Id -> String. Uses the vocabulary in the provided context
977-
# // Does not write null terminator to the buffer
978-
# LLAMA_API int llama_token_to_str(
976+
# // Token Id -> Piece.
977+
# // Uses the vocabulary in the provided context.
978+
# // Does not write null terminator to the buffer.
979+
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
980+
# LLAMA_API int llama_token_to_piece(
979981
# const struct llama_context * ctx,
980-
# llama_token token,
981-
# char * buf,
982-
# int length);
983-
def llama_token_to_str(
982+
# llama_token token,
983+
# char * buf,
984+
# int length);
985+
def llama_token_to_piece(
984986
ctx: llama_context_p, token: llama_token, buf: bytes, length: c_int
985987
) -> int:
986-
return _lib.llama_token_to_str(ctx, token, buf, length)
988+
return _lib.llama_token_to_piece(ctx, token, buf, length)
987989

988990

989-
_lib.llama_tokenize_with_model.argtypes = [
990-
llama_model_p,
991-
c_char_p,
992-
llama_token_p,
993-
c_int,
994-
c_bool,
995-
]
996-
_lib.llama_tokenize_with_model.restype = c_int
991+
_lib.llama_token_to_piece.argtypes = [llama_context_p, llama_token, c_char_p, c_int]
992+
_lib.llama_token_to_piece.restype = c_int
997993

998994

999-
# LLAMA_API int llama_token_to_str_with_model(
1000-
# const struct llama_model * model,
1001-
# llama_token token,
1002-
# char * buf,
1003-
# int length);
1004-
def llama_token_to_str_with_model(
995+
# LLAMA_API int llama_token_to_piece_with_model(
996+
# const struct llama_model * model,
997+
# llama_token token,
998+
# char * buf,
999+
# int length);
1000+
def llama_token_to_piece_with_model(
10051001
model: llama_model_p, token: llama_token, buf: bytes, length: c_int
10061002
) -> int:
1007-
return _lib.llama_token_to_str_with_model(model, token, buf, length)
1003+
return _lib.llama_token_to_piece_with_model(model, token, buf, length)
10081004

10091005

1010-
_lib.llama_token_to_str_with_model.argtypes = [
1006+
_lib.llama_token_to_piece_with_model.argtypes = [
10111007
llama_model_p,
10121008
llama_token,
10131009
c_char_p,
10141010
c_int,
10151011
]
1016-
_lib.llama_token_to_str_with_model.restype = c_int
1017-
1012+
_lib.llama_token_to_piece_with_model.restype = c_int
10181013

10191014
# //
10201015
# // Grammar

tests/test_llama.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@ def test_llama_cpp_tokenization():
1414

1515
tokens = llama.tokenize(text)
1616
assert tokens[0] == llama.token_bos()
17-
assert tokens == [1, 10994, 2787]
17+
assert tokens == [1, 15043, 2787]
1818
detokenized = llama.detokenize(tokens)
1919
assert detokenized == text
2020

2121
tokens = llama.tokenize(text, add_bos=False)
2222
assert tokens[0] != llama.token_bos()
23-
assert tokens == [10994, 2787]
23+
assert tokens == [15043, 2787]
2424

2525
detokenized = llama.detokenize(tokens)
26-
assert detokenized == text
26+
assert detokenized != text
2727

2828

2929
@pytest.mark.skip(reason="bug in tokenization where leading space is always inserted even if not after eos")

vendor/llama.cpp

0 commit comments

Comments
 (0)
0