@@ -948,8 +948,7 @@ def logit_bias_processor(
948
948
949
949
if stream :
950
950
remaining_tokens = completion_tokens [returned_tokens :]
951
- prev_tokens = completion_tokens [:returned_tokens ]
952
- remaining_text = self .detokenize (completion_tokens , prev_tokens )
951
+ remaining_text = self .detokenize (remaining_tokens )
953
952
remaining_length = len (remaining_text )
954
953
955
954
# We want to avoid yielding any characters from
@@ -971,13 +970,13 @@ def logit_bias_processor(
971
970
for token in remaining_tokens :
972
971
if token == self .token_bos ():
973
972
continue
974
- token_end_position += len (remaining_text )
973
+ token_end_position += len (self . detokenize ([ token ]) )
975
974
# Check if stop sequence is in the token
976
975
if token_end_position > (
977
976
remaining_length - first_stop_position
978
977
):
979
978
break
980
- token_str = remaining_text .decode (
979
+ token_str = self . detokenize ([ token ]) .decode (
981
980
"utf-8" , errors = "ignore"
982
981
)
983
982
text_offset = len (prompt ) + len (
@@ -1002,7 +1001,11 @@ def logit_bias_processor(
1002
1001
}
1003
1002
top_logprob .update ({token_str : current_logprobs [int (token )]})
1004
1003
logprobs_or_none = {
1005
- "tokens" : [token_str ],
1004
+ "tokens" : [
1005
+ self .detokenize ([token ]).decode (
1006
+ "utf-8" , errors = "ignore"
1007
+ )
1008
+ ],
1006
1009
"text_offset" : [text_offset ],
1007
1010
"token_logprobs" : [current_logprobs [int (token )]],
1008
1011
"top_logprobs" : [top_logprob ],
@@ -1015,7 +1018,
8000
9 @@ def logit_bias_processor(
1015
1018
"model" : model_name ,
1016
1019
"choices" : [
1017
1020
{
1018
- "text" : token_str ,
1021
+ "text" : self .detokenize ([token ]).decode (
1022
+ "utf-8" , errors = "ignore"
1023
+ ),
1019
1024
"index" : 0 ,
1020
1025
"logprobs" : logprobs_or_none ,
1021
1026
"finish_reason" : None ,
@@ -1027,7 +1032,7 @@ def logit_bias_processor(
1027
1032
decode_success = False
1028
1033
for i in range (1 , len (remaining_tokens ) + 1 ):
1029
1034
try :
1030
- bs = remaining_text
1035
+ bs = self . detokenize ( remaining_tokens [: i ])
1031
1036
ts = bs .decode ("utf-8" )
1032
1037
decode_success = True
1033
1038
break
@@ -1063,7 +1068,6 @@ def logit_bias_processor(
1063
1068
1064
1069
if len (compl
522F
etion_tokens ) >= max_tokens :
1065
1070
text = self .detokenize (completion_tokens )
1066
-
1067
1071
finish_reason = "length"
1068
1072
break
1069
1073
0 commit comments