8000 Fix #1038 · sunnykim1206/llama-cpp-python@e1cd61e · GitHub
[go: up one dir, main page]

Skip to content

Commit e1cd61e

Browse files
committed
1 parent b1e9962 commit e1cd61e

File tree

2 files changed

+24
-7
lines changed

2 file< 8000 !-- -->s changed

+24
-7
lines changed

llama_cpp/llama.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ def _completion_response(text: str, finish_reason: Literal["stop", "length"], lo
872872
break
873873

874874
if stream:
875-
remaining_tokens = completion_tokens[returned_tokens:]
875+
remaining_tokens = completion_tokens[returned_tokens:-1]
876876
remaining_text = self.detokenize(remaining_tokens)
877877
remaining_length = len(remaining_text)
878878

@@ -1030,9 +1030,14 @@ def _completion_response(text: str, finish_reason: Literal["stop", "length"], lo
10301030
break
10311031
returned_tokens += 1
10321032
yield _completion_stream_response(
1033-
text=last_text[: len(last_text) - (token_end_position - end)].decode("utf-8", errors="ignore"), logprobs_or_none=logprobs_or_none
1033+
text=last_text[: len(last_text) - (token_end_position - end)].decode("utf-8", errors="ignore"), logprobs_or_none=logprobs_or_none, finish_reason=finish_reason
10341034
)
1035-
break
1035+
if self.cache:
1036+
if self.verbose:
1037+
print("Llama._create_completion: cache save", file=sys.stderr)
1038+
self.cache[prompt_tokens + completion_tokens] = self.save_state()
1039+
print("Llama._create_completion: cache saved", file=sys.stderr)
1040+
return
10361041
returned_tokens += 1
10371042
yield _completion_stream_response(
10381043
text=self.detokenize([token]).decode("utf-8", errors="ignore"), logprobs_or_none=logprobs_or_none

llama_cpp/llama_chat_format.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,25 @@ def _convert_text_completion_chunks_to_chat(
260260
"index": 0,
261261
"delta": {
262262
"content": chunk["choices"][0]["text"],
263-
}
264-
if chunk["choices"][0]["finish_reason"] is None
265-
else {},
266-
"finish_reason": chunk["choices"][0]["finish_reason"],
263+
},
264+
"finish_reason": None,
267265
}
268266
],
269267
}
268+
if chunk["choices"][0]["finish_reason"] is not None:
269+
yield {
270+
"id": "chat" + chunk["id"],
271+
"model": chunk["model"],
272+
"created": chunk["created"],
273+
"object": "chat.completion.chunk",
274+
"choices": [
275+
{
276+
"index": 0,
277+
"delta": {},
278+
"finish_reason": chunk["choices"][0]["finish_reason"],
279+
}
280+
],
281+
}
270282

271283

272284
def _convert_completion_to_chat(

0 commit comments

Comments
 (0)
0