From 3a07b905869ac40de44fbc8dfe372fe21d753306 Mon Sep 17 00:00:00 2001 From: Ryan Hua Date: Sun, 16 Jun 2024 18:57:10 -0400 Subject: [PATCH 1/3] feat: stream token usage --- llama_cpp/llama.py | 5 +++ llama_cpp/llama_chat_format.py | 65 ++++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 005045f5c..c7cb0d9a6 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1517,6 +1517,11 @@ def logit_bias_processor( "finish_reason": finish_reason, } ], + "usage": { + "prompt_tokens": len(prompt_tokens), + "completion_tokens": len(completion_tokens), + "total_tokens": len(prompt_tokens) + len(completion_tokens), + }, } if self.cache: if self.verbose: diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index ea8d07feb..f6d54e15b 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -304,26 +304,53 @@ def _convert_text_completion_chunks_to_chat( } ], } - yield { - "id": "chat" + chunk["id"], - "model": chunk["model"], - "created": chunk["created"], - "object": "chat.completion.chunk", - "choices": [ - { - "index": 0, - "delta": ( - { - "content": chunk["choices"][0]["text"], - } - if chunk["choices"][0]["finish_reason"] is None - else {} - ), - "logprobs": chunk["choices"][0]["logprobs"], - "finish_reason": chunk["choices"][0]["finish_reason"], + if chunk["choices"][0]["finish_reason"] is None: + yield { + "id": "chat" + chunk["id"], + "model": chunk["model"], + "created": chunk["created"], + "object": "chat.completion.chunk", + "choices": [ + { + "index": 0, + "delta": ( + { + "content": chunk["choices"][0]["text"], + } + if chunk["choices"][0]["finish_reason"] is None + else {} + ), + "logprobs": chunk["choices"][0]["logprobs"], + "finish_reason": chunk["choices"][0]["finish_reason"], + } + ] + } + else: + yield { + "id": "chat" + chunk["id"], + "model": chunk["model"], + "created": chunk["created"], + "object": "chat.completion.chunk", + "choices": [ + { + "index": 0, + "delta": ( + { + "content": chunk["choices"][0]["text"], + } + if chunk["choices"][0]["finish_reason"] is None + else {} + ), + "logprobs": chunk["choices"][0]["logprobs"], + "finish_reason": chunk["choices"][0]["finish_reason"], + } + ], + "usage": { + "prompt_tokens": chunk["usage"]["prompt_tokens"], + "completion_tokens": chunk["usage"]["completion_tokens"], + "total_tokens": chunk["usage"]["total_tokens"], } - ], - } + } def _convert_completion_to_chat( From d487d22ae3fdf4f7d1e52d002ff0375d384d6656 Mon Sep 17 00:00:00 2001 From: Ryan Hua Date: Sun, 16 Jun 2024 18:57:33 -0400 Subject: [PATCH 2/3] feat: makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index be9b55a3e..586da224b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ update: - poetry install + # poetry install git submodule update --init --recursive update.vendor: From c9ec9c81d73ac4e835c0d2e5d98ef289a0def3a6 Mon Sep 17 00:00:00 2001 From: Ryan Hua Date: Wed, 7 Aug 2024 21:04:11 -0400 Subject: [PATCH 3/3] fix: extra parenthesis --- llama_cpp/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index d53359c62..aabc019f2 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -2165,7 +2165,7 @@ def from_pretrained( files = [ file["name"] if isinstance(file, dict) else file - for file in hffs.ls(repo_id, recursive=True)) + for file in hffs.ls(repo_id, recursive=True) ] # split each file into repo_id, subfolder, filename