abetlen
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/llama.py‎
Lines changed: 5 additions & 0 deletions b/‎llama_cpp/llama.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎llama_cpp/llama_chat_format.py‎
Lines changed: 46 additions & 19 deletions b/‎llama_cpp/llama_chat_format.py‎
Lines changed: 46 additions & 19 deletions
@@ -1,5 +1,5 @@
 update:
-	poetry install
+	# poetry install
 	git submodule update --init --recursive
 
 update.vendor:
 
@@ -1517,6 +1517,11 @@ def logit_bias_processor(
                         "finish_reason": finish_reason,
                     }
                 ],
+                "usage": {
+                    "prompt_tokens": len(prompt_tokens),
+                    "completion_tokens": len(completion_tokens),
+                    "total_tokens": len(prompt_tokens) + len(completion_tokens),
+                },
             }
             if self.cache:
                 if self.verbose:
 
@@ -304,26 +304,53 @@ def _convert_text_completion_chunks_to_chat(
                     }
                 ],
             }
-        yield {
-            "id": "chat" + chunk["id"],
-            "model": chunk["model"],
-            "created": chunk["created"],
-            "object": "chat.completion.chunk",
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": (
-                        {
-                            "content": chunk["choices"][0]["text"],
-                        }
-                        if chunk["choices"][0]["finish_reason"] is None
-                        else {}
-                    ),
-                    "logprobs": chunk["choices"][0]["logprobs"],
-                    "finish_reason": chunk["choices"][0]["finish_reason"],
+        if chunk["choices"][0]["finish_reason"] is None:
+            yield {
+                "id": "chat" + chunk["id"],
+                "model": chunk["model"],
+                "created": chunk["created"],
+                "object": "chat.completion.chunk",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": (
+                            {
+                                "content": chunk["choices"][0]["text"],
+                            }
+                            if chunk["choices"][0]["finish_reason"] is None
+                            else {}
+                        ),
+                        "logprobs": chunk["choices"][0]["logprobs"],
+                        "finish_reason": chunk["choices"][0]["finish_reason"],
+                    }
+                ]
+            }
+        else:
+            yield {
+                "id": "chat" + chunk["id"],
+                "model": chunk["model"],
+                "created": chunk["created"],
+                "object": "chat.completion.chunk",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": (
+                            {
+                                "content": chunk["choices"][0]["text"],
+                            }
+                            if chunk["choices"][0]["finish_reason"] is None
+                            else {}
+                        ),
+                        "logprobs": chunk["choices"][0]["logprobs"],
+                        "finish_reason": chunk["choices"][0]["finish_reason"],
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": chunk["usage"]["prompt_tokens"],
+                    "completion_tokens": chunk["usage"]["completion_tokens"],
+                    "total_tokens": chunk["usage"]["total_tokens"],
                 }
-            ],
-        }
+            }
 
 
 def _convert_completion_to_chat(
Original file line number	Diff line number	Diff line change
`@@ -1517,6 +1517,11 @@ def logit_bias_processor(`
`1517`	`1517`	`"finish_reason": finish_reason,`
`1518`	`1518`	`}`
`1519`	`1519`	`],`
	`1520`	`+ "usage": {`
	`1521`	`+ "prompt_tokens": len(prompt_tokens),`
	`1522`	`+ "completion_tokens": len(completion_tokens),`
	`1523`	`+ "total_tokens": len(prompt_tokens) + len(completion_tokens),`
	`1524`	`+ },`
`1520`	`1525`	`}`
`1521`	`1526`	`if self.cache:`
`1522`	`1527`	`if self.verbose:`