Use server sent events function for streaming completion

abetlen · abetlen · commit dc39cc0fa410 · 2023-05-19T02:04:30.000-04:00
diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
@@ -259,8 +259,15 @@ def create_completion(
         )
     )
     if request.stream:
+
+        async def server_sent_events(
+            chunks: Iterator[llama_cpp.CompletionChunk],
+        ):
+            for chunk in chunks:
+                yield dict(data=json.dumps(chunk))
+
         chunks: Iterator[llama_cpp.CompletionChunk] = completion_or_chunks  # type: ignore
-        return EventSourceResponse(dict(data=json.dumps(chunk)) for chunk in chunks)
+        return EventSourceResponse(server_sent_events(chunks))
     completion: llama_cpp.Completion = completion_or_chunks  # type: ignore
     return completion