8000 Merge branch 'main' of github.com:abetlen/llama_cpp_python into main · coderonion/llama-cpp-python@4abb8c9 · GitHub
[go: up one dir, main page]

Skip to content 8000

Commit 4abb8c9

Browse files
committed
Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
2 parents 85d3374 + e16f06e commit 4abb8c9

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

llama_cpp/llama.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -950,8 +950,7 @@ def logit_bias_processor(
950950

951951
if stream:
952952
remaining_tokens = completion_tokens[returned_tokens:]
953-
prev_tokens = completion_tokens[:returned_tokens]
954-
remaining_text = self.detokenize(completion_tokens, prev_tokens)
953+
remaining_text = self.detokenize(remaining_tokens)
955954
remaining_length = len(remaining_text)
956955

957956
# We want to avoid yielding any characters from
@@ -973,13 +972,13 @@ def logit_bias_processor(
973972
for token in remaining_tokens:
974973
if token == self.token_bos():
975974
continue
976-
token_end_position += len(remaining_text)
975+
token_end_position += len(self.detokenize([token]))
977976
# Check if stop sequence is in the token
978977
if token_end_position > (
979978
remaining_length - first_stop_position
980979
):
981980
break
982-
token_str = remaining_text.decode(
981+
token_str = self.detokenize([token]).decode(
983982
"utf-8", errors="ignore"
984983
)
985984
text_offset = len(prompt) + len(
@@ -1004,7 +1003,11 @@ def logit_bias_processor(
10041003
}
10051004
top_logprob.update({token_str: current_logprobs[int(token)]})
10061005
logprobs_or_none = {
1007-
"tokens": [token_str],
1006+
"tokens": [
1007+
self.detokenize([token]).decode(
1008+
"utf-8", errors="ignore"
1009+
8000 )
1010+
],
10081011
"text_offset": [text_offset],
10091012
"token_logprobs": [current_logprobs[int(token)]],
10101013
"top_logprobs": [top_logprob],
@@ -1017,7 +1020,9 @@ def logit_bias_processor(
10171020
"model": model_name,
10181021
"choices": [
10191022
{
1020-
"text": token_str,
1023+
"text": self.detokenize([token]).decode(
1024+
"utf-8", errors="ignore"
1025+
),
10211026
"index": 0,
10221027
"logprobs": logprobs_or_none,
10231028
"finish_reason": None,
@@ -1029,7 +1034,7 @@ def logit_bias_processor(
10291034
decode_success = False
10301035
for i in range(1, len(remaining_tokens) + 1):
10311036
try:
1032-
bs = remaining_text
1037+
bs = self.detokenize(remaining_tokens[:i])
10331038
ts = bs.decode("utf-8")
10341039
decode_success = True
10351040
break
@@ -1065,7 +1070,6 @@ def logit_bias_processor(
10651070

10661071
if len(completion_tokens) >= max_tokens:
10671072
text = self.detokenize(completion_tokens)
1068-
10691073
finish_reason = "length"
10701074
break
10711075

vendor/llama.cpp

0 commit comments

Comments
 (0)
0