From 1d8d61303b487755b320c8bb483cef46dd53f5c4 Mon Sep 17 00:00:00 2001 From: Tianyang Zhang Date: Tue, 27 Jan 2026 22:29:08 +0000 Subject: [PATCH] Add language model API to return token usage --- .../openai_responses_language_model.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/memmachine/common/language_model/openai_responses_language_model.py b/src/memmachine/common/language_model/openai_responses_language_model.py index 3a2e72ba2..b735b763f 100644 --- a/src/memmachine/common/language_model/openai_responses_language_model.py +++ b/src/memmachine/common/language_model/openai_responses_language_model.py @@ -191,6 +191,39 @@ async def generate_response( # noqa: C901 tools: list[dict[str, Any]] | None = None, tool_choice: str | dict[str, str] | None = None, max_attempts: int = 1, + ): + output, function_calls_arguments, _, _ = await self._generate_response( + system_prompt=system_prompt, + user_prompt=user_prompt, + tools=tools, + tool_choice=tool_choice, + max_attempts=max_attempts, + ) + return output, function_calls_arguments + + async def generate_response_with_token_usage( + self, + system_prompt: str | None = None, + user_prompt: str | None = None, + tools: list[dict[str, Any]] | None = None, + tool_choice: str | dict[str, str] | None = None, + max_attempts: int = 1, + ): + return await self._generate_response( + system_prompt=system_prompt, + user_prompt=user_prompt, + tools=tools, + tool_choice=tool_choice, + max_attempts=max_attempts, + ) + + async def _generate_response( + self, + system_prompt: str | None = None, + user_prompt: str | None = None, + tools: list[dict[str, Any]] | None = None, + tool_choice: str | dict[str, str] | None = None, + max_attempts: int = 1, ) -> tuple[str, Any]: """Generate a raw text response (and optional tool call).""" if max_attempts <= 0: @@ -320,6 +353,8 @@ async def generate_response( # noqa: C901 return ( response.output_text, function_calls_arguments, + response.usage.input_tokens, + response.usage.output_tokens, ) def _collect_metrics(