Now the last token sent when stream=True

ctejada85 · ctejada85 · commit 0756a2d3fbea · 2023-07-19T22:47:14.000-04:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -953,7 +953,7 @@ def _create_completion(
                     token_end_position += len(self.detokenize([token]))
                     # Check if stop sequence is in the token
                     if token_end_position >= (
-                        remaining_length - first_stop_position - 1
+                        remaining_length - first_stop_position
                     ):
                         break
                     logprobs_or_none: Optional[CompletionLogprobs] = None