From 89ae8bde4c08968b7978d5b9ae5e6c445542e476 Mon Sep 17 00:00:00 2001
From: gmcgoldr <garrin.mcgoldrick@gmail.com>
Date: Fri, 6 Oct 2023 21:13:11 -0400
Subject: [PATCH] Fix streaming doesn't return finish reason

When streaming the yield that contains the finish can be skipped. This change ensures that yield isn't skipped.
---
 llama_cpp/llama.py | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index fdde7ea01..b43a354b3 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -1228,20 +1228,6 @@ def _create_completion(
                             }
                         ],
                     }
-                    yield {
-                        "id": completion_id,
-                        "object": "text_completion",
-                        "created": created,
-                        "model": model_name,
-                        "choices": [
-                            {
-                                "text": "",
-                                "index": 0,
-                                "logprobs": None,
-                                "finish_reason": finish_reason,
-                            }
-                        ],
-                    }
                     break
                 returned_tokens += 1
                 yield {
@@ -1260,20 +1246,20 @@ def _create_completion(
                         }
                     ],
                 }
-                yield {
-                    "id": completion_id,
-                    "object": "text_completion",
-                    "created": created,
-                    "model": model_name,
-                    "choices": [
-                        {
-                            "text": "",
-                            "index": 0,
-                            "logprobs": None,
-                            "finish_reason": finish_reason,
-                        }
-                    ],
-                }
+            yield {
+                "id": completion_id,
+                "object": "text_completion",
+                "created": created,
+                "model": model_name,
+                "choices": [
+                    {
+                        "text": "",
+                        "index": 0,
+                        "logprobs": None,
+                        "finish_reason": finish_reason,
+                    }
+                ],
+            }
             if self.cache:
                 if self.verbose:
                     print("Llama._create_completion: cache save", file=sys.stderr)