8000 Use _with_model variants for tokenization · iamudesharma/llama-cpp-python@48cf43b · GitHub
[go: up one dir, main page]

Skip to content

Commit 48cf43b

Browse files
committed
Use _with_model variants for tokenization
1 parent 80389f7 commit 48cf43b

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

llama_cpp/llama.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -408,11 +408,11 @@ def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
408408
Returns:
409409
A list of tokens.
410410
"""
411-
assert self.ctx is not None
411+
assert self.model is not None
412412
n_ctx = self._n_ctx
413413
tokens = (llama_cpp.llama_token * n_ctx)()
414-
n_tokens = llama_cpp.llama_tokenize(
415-
self.ctx,
414+
n_tokens = llama_cpp.llama_tokenize_with_model(
415+
self.model,
416416
text,
417417
tokens,
418418
llama_cpp.c_int(n_ctx),
@@ -421,8 +421,8 @@ def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
421421
if n_tokens < 0:
422422
n_tokens = abs(n_tokens)
423423
tokens = (llama_cpp.llama_token * n_tokens)()
424-
n_tokens = llama_cpp.llama_tokenize(
425-
self.ctx,
424+
n_tokens = llama_cpp.llama_tokenize_with_model(
425+
self.model,
426426
text,
427427
tokens,
428428
llama_cpp.c_int(n_tokens),
@@ -443,15 +443,15 @@ def detokenize(self, tokens: List[int]) -> bytes:
443443
Returns:
444444
The detokenized string.
445445
"""
446-
assert self.ctx is not None
446+
assert self.model is not None
447447
output = b""
448-
buffer_size = 8
449-
buffer = (ctypes.c_char * buffer_size)()
448+
size = 8
449+
buffer = (ctypes.c_char * size)()
450450
for token in tokens:
451-
n = llama_cpp.llama_token_to_str(
452-
self.ctx, llama_cpp.llama_token(token), buffer, buffer_size
451+
n = llama_cpp.llama_token_to_str_with_model(
452+
self.model, llama_cpp.llama_token(token), buffer, size
453453
)
454-
assert n <= buffer_size
454+
assert n <= size
455455
output += bytes(buffer[:n])
456456
# NOTE: Llama1 models automatically added a space at the start of the prompt
457457
# this line removes a leading space if the first token is a beginning of sentence token

0 commit comments

Comments
 (0)
0