diff --git a/Makefile b/Makefile index be9b55a3e..586da224b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ update: - poetry install + # poetry install git submodule update --init --recursive update.vendor: diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 7fac936d1..c9fa80161 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1529,6 +1529,11 @@ def logit_bias_processor( "finish_reason": finish_reason, } ], + "usage": { + "prompt_tokens": len(prompt_tokens), + "completion_tokens": len(completion_tokens), + "total_tokens": len(prompt_tokens) + len(completion_tokens), + }, } if self.cache: if self.verbose: diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index dfb0af65e..cf6fcaee1 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -304,26 +304,53 @@ def _convert_text_completion_chunks_to_chat( } ], } - yield { - "id": "chat" + chunk["id"], - "model": chunk["model"], - "created": chunk["created"], - "object": "chat.completion.chunk", - "choices": [ - { - "index": 0, - "delta": ( - { - "content": chunk["choices"][0]["text"], - } - if chunk["choices"][0]["finish_reason"] is None - else {} - ), - "logprobs": chunk["choices"][0]["logprobs"], - "finish_reason": chunk["choices"][0]["finish_reason"], + if chunk["choices"][0]["finish_reason"] is None: + yield { + "id": "chat" + chunk["id"], + "model": chunk["model"], + "created": chunk["created"], + "object": "chat.completion.chunk", + "choices": [ + { + "index": 0, + "delta": ( + { + "content": chunk["choices"][0]["text"], + } + if chunk["choices"][0]["finish_reason"] is None + else {} + ), + "logprobs": chunk["choices"][0]["logprobs"], + "finish_reason": chunk["choices"][0]["finish_reason"], + } + ] + } + else: + yield { + "id": "chat" + chunk["id"], + "model": chunk["model"], + "created": chunk["created"], + "object": "chat.completion.chunk", + "choices": [ + { + "index": 0, + "delta": ( + { + "content": chunk["choices"][0]["text"], + } + if chunk["choices"][0]["finish_reason"] is None + else {} + ), + "logprobs": chunk["choices"][0]["logprobs"], + "finish_reason": chunk["choices"][0]["finish_reason"], + } + ], + "usage": { + "prompt_tokens": chunk["usage"]["prompt_tokens"], + "completion_tokens": chunk["usage"]["completion_tokens"], + "total_tokens": chunk["usage"]["total_tokens"], } - ], - } + } def _convert_completion_to_chat(