8000 Track generated tokens internally · coderonion/llama-cpp-python@ac7068a · GitHub
[go: up one dir, main page]

Skip to content

Commit ac7068a

Browse files
committed
Track generated tokens internally
1 parent 25b646c commit ac7068a

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

llama_cpp/llama.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def __init__(
7676
maxlen=self.last_n_tokens_size,
7777
)
7878
self.tokens_consumed = 0
79+
self.tokens: List[llama_cpp.llama_token] = []
7980
self.n_batch = min(n_ctx, n_batch)
8081
self.n_tokens = 0
8182
self.n_past = 0
@@ -140,6 +141,7 @@ def reset(self):
140141
[llama_cpp.llama_token(0)] * self.last_n_tokens_size
141142
)
142143
self.tokens_consumed = 0
144+
self.tokens.clear()
143145
self.n_tokens = 0
144146
self.n_past = 0
145147
self.all_logits = []
@@ -165,6 +167,7 @@ def eval(self, tokens: Sequence[llama_cpp.llama_token]):
165167
)
166168
if int(return_code) != 0:
167169
raise RuntimeError(f"llama_eval returned {return_code}")
170+
self.tokens.extend(batch)
168171
self.last_n_tokens_data.extend(batch)
169172
self.tokens_consumed += len(batch)
170173
if self.params.logits_all:

0 commit comments

Comments
 (0)
0