diff --git a/Makefile b/Makefile
index be9b55a3e..586da224b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 update:
-	poetry install
+	# poetry install
 	git submodule update --init --recursive
 
 update.vendor:
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 7fac936d1..c9fa80161 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -1529,6 +1529,11 @@ def logit_bias_processor(
                         "finish_reason": finish_reason,
                     }
                 ],
+                "usage": {
+                    "prompt_tokens": len(prompt_tokens),
+                    "completion_tokens": len(completion_tokens),
+                    "total_tokens": len(prompt_tokens) + len(completion_tokens),
+                },
             }
             if self.cache:
                 if self.verbose:
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index dfb0af65e..cf6fcaee1 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -304,26 +304,53 @@ def _convert_text_completion_chunks_to_chat(
                     }
                 ],
             }
-        yield {
-            "id": "chat" + chunk["id"],
-            "model": chunk["model"],
-            "created": chunk["created"],
-            "object": "chat.completion.chunk",
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": (
-                        {
-                            "content": chunk["choices"][0]["text"],
-                        }
-                        if chunk["choices"][0]["finish_reason"] is None
-                        else {}
-                    ),
-                    "logprobs": chunk["choices"][0]["logprobs"],
-                    "finish_reason": chunk["choices"][0]["finish_reason"],
+        if chunk["choices"][0]["finish_reason"] is None:
+            yield {
+                "id": "chat" + chunk["id"],
+                "model": chunk["model"],
+                "created": chunk["created"],
+                "object": "chat.completion.chunk",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": (
+                            {
+                                "content": chunk["choices"][0]["text"],
+                            }
+                            if chunk["choices"][0]["finish_reason"] is None
+                            else {}
+                        ),
+                        "logprobs": chunk["choices"][0]["logprobs"],
+                        "finish_reason": chunk["choices"][0]["finish_reason"],
+                    }
+                ]
+            }
+        else:
+            yield {
+                "id": "chat" + chunk["id"],
+                "model": chunk["model"],
+                "created": chunk["created"],
+                "object": "chat.completion.chunk",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": (
+                            {
+                                "content": chunk["choices"][0]["text"],
+                            }
+                            if chunk["choices"][0]["finish_reason"] is None
+                            else {}
+                        ),
+                        "logprobs": chunk["choices"][0]["logprobs"],
+                        "finish_reason": chunk["choices"][0]["finish_reason"],
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": chunk["usage"]["prompt_tokens"],
+                    "completion_tokens": chunk["usage"]["completion_tokens"],
+                    "total_tokens": chunk["usage"]["total_tokens"],
                 }
-            ],
-        }
+            }
 
 
 def _convert_completion_to_chat(