@@ -371,8 +371,8 @@ def __init__(
371
371
sorted = sorted ,
372
372
)
373
373
self ._candidates = candidates
374
- self ._token_nl = Llama .token_nl ()
375
- self ._token_eos = Llama .token_eos ()
374
+ self ._token_nl = self .token_nl ()
375
+ self ._token_eos = self .token_eos ()
376
376
self ._candidates_data_id = np .arange (self ._n_vocab , dtype = np .intc ) # type: ignore
377
377
self ._candidates_data_p = np .zeros (self ._n_vocab , dtype = np .single )
378
378
@@ -450,10 +450,14 @@ def detokenize(self, tokens: List[int]) -> bytes:
450
450
"""
451
451
assert self .ctx is not None
452
452
output = b""
453
+ buffer_size = 32
454
+ buffer = (ctypes .c_char * buffer_size )()
453
455
for token in tokens :
454
- output + = llama_cpp .llama_token_to_str (
455
- self .ctx , llama_cpp .llama_token (token )
456
+ n = llama_cpp .llama_token_to_str (
457
+ self .ctx , llama_cpp .llama_token (token ), buffer , buffer_size
456
458
)
459
+ assert n <= buffer_size
460
+ output += bytes (buffer [:n ])
457
461
return output
458
462
459
463
def set_cache (self , cache : Optional [BaseLlamaCache ]):
@@ -1681,20 +1685,20 @@ def tokenizer(self) -> "LlamaTokenizer":
1681
1685
assert self .ctx is not None
1682
1686
return LlamaTokenizer (self )
1683
1687
1684
- @staticmethod
1685
- def token_eos () -> int :
1688
+ def token_eos (self ) -> int :
1686
1689
"""Return the end-of-sequence token."""
1687
- return llama_cpp .llama_token_eos ()
1690
+ assert self .ctx is not None
1691
+ return llama_cpp .llama_token_eos (self .ctx )
1688
1692
1689
- @staticmethod
1690
- def token_bos () -> int :
1693
+ def token_bos (self ) -> int :
1691
1694
"""Return the beginning-of-sequence token."""
1692
- return llama_cpp .llama_token_bos ()
1695
+ assert self .ctx is not None
1696
+ return llama_cpp .llama_token_bos (self .ctx )
1693
1697
1694
- @staticmethod
1695
- def token_nl () -> int :
1698
+ def token_nl (self ) -> int :
1696
1699
"""Return the newline token."""
1697
- return llama_cpp .llama_token_nl ()
1700
+ assert self .ctx is not None
1701
+ return llama_cpp .llama_token_nl (self .ctx )
1698
1702
1699
1703
@staticmethod
1700
1704
def logits_to_logprobs (logits : List [float ]) -> List [float ]:
0 commit comments