From 3a07b905869ac40de44fbc8dfe372fe21d753306 Mon Sep 17 00:00:00 2001
From: Ryan Hua <ryan1117001@gmail.com>
Date: Sun, 16 Jun 2024 18:57:10 -0400
Subject: [PATCH 1/3] feat: stream token usage

---
 llama_cpp/llama.py             |  5 +++
 llama_cpp/llama_chat_format.py | 65 ++++++++++++++++++++++++----------
 2 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 005045f5c..c7cb0d9a6 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -1517,6 +1517,11 @@ def logit_bias_processor(
                         "finish_reason": finish_reason,
                     }
                 ],
+                "usage": {
+                    "prompt_tokens": len(prompt_tokens),
+                    "completion_tokens": len(completion_tokens),
+                    "total_tokens": len(prompt_tokens) + len(completion_tokens),
+                },
             }
             if self.cache:
                 if self.verbose:
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index ea8d07feb..f6d54e15b 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -304,26 +304,53 @@ def _convert_text_completion_chunks_to_chat(
                     }
                 ],
             }
-        yield {
-            "id": "chat" + chunk["id"],
-            "model": chunk["model"],
-            "created": chunk["created"],
-            "object": "chat.completion.chunk",
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": (
-                        {
-                            "content": chunk["choices"][0]["text"],
-                        }
-                        if chunk["choices"][0]["finish_reason"] is None
-                        else {}
-                    ),
-                    "logprobs": chunk["choices"][0]["logprobs"],
-                    "finish_reason": chunk["choices"][0]["finish_reason"],
+        if chunk["choices"][0]["finish_reason"] is None:
+            yield {
+                "id": "chat" + chunk["id"],
+                "model": chunk["model"],
+                "created": chunk["created"],
+                "object": "chat.completion.chunk",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": (
+                            {
+                                "content": chunk["choices"][0]["text"],
+                            }
+                            if chunk["choices"][0]["finish_reason"] is None
+                            else {}
+                        ),
+                        "logprobs": chunk["choices"][0]["logprobs"],
+                        "finish_reason": chunk["choices"][0]["finish_reason"],
+                    }
+                ]
+            }
+        else:
+            yield {
+                "id": "chat" + chunk["id"],
+                "model": chunk["model"],
+                "created": chunk["created"],
+                "object": "chat.completion.chunk",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": (
+                            {
+                                "content": chunk["choices"][0]["text"],
+                            }
+                            if chunk["choices"][0]["finish_reason"] is None
+                            else {}
+                        ),
+                        "logprobs": chunk["choices"][0]["logprobs"],
+                        "finish_reason": chunk["choices"][0]["finish_reason"],
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": chunk["usage"]["prompt_tokens"],
+                    "completion_tokens": chunk["usage"]["completion_tokens"],
+                    "total_tokens": chunk["usage"]["total_tokens"],
                 }
-            ],
-        }
+            }
 
 
 def _convert_completion_to_chat(

From d487d22ae3fdf4f7d1e52d002ff0375d384d6656 Mon Sep 17 00:00:00 2001
From: Ryan Hua <ryan1117001@gmail.com>
Date: Sun, 16 Jun 2024 18:57:33 -0400
Subject: [PATCH 2/3] feat: makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index be9b55a3e..586da224b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 update:
-	poetry install
+	# poetry install
 	git submodule update --init --recursive
 
 update.vendor:

From c9ec9c81d73ac4e835c0d2e5d98ef289a0def3a6 Mon Sep 17 00:00:00 2001
From: Ryan Hua <ryan1117001@gmail.com>
Date: Wed, 7 Aug 2024 21:04:11 -0400
Subject: [PATCH 3/3] fix: extra parenthesis

---
 llama_cpp/llama.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index d53359c62..aabc019f2 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -2165,7 +2165,7 @@ def from_pretrained(
 
         files = [
             file["name"] if isinstance(file, dict) else file
-            for file in hffs.ls(repo_id, recursive=True))
+            for file in hffs.ls(repo_id, recursive=True)
         ]
 
         # split each file into repo_id, subfolder, filename