From 4a529e6c167ce2f25d7d19f18edc6dfcef59aca8 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Wed, 4 Jun 2025 11:53:17 -0400
Subject: [PATCH 01/11] Add REPL run_demo_loop helper (#811)

---
 docs/ja/repl.md        | 22 ++++++++++++++
 docs/ref/repl.md       |  6 ++++
 docs/repl.md           | 19 ++++++++++++
 mkdocs.yml             |  3 ++
 src/agents/__init__.py |  2 ++
 src/agents/repl.py     | 65 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_repl.py     | 28 ++++++++++++++++++
 7 files changed, 145 insertions(+)
 create mode 100644 docs/ja/repl.md
 create mode 100644 docs/ref/repl.md
 create mode 100644 docs/repl.md
 create mode 100644 src/agents/repl.py
 create mode 100644 tests/test_repl.py

diff --git a/docs/ja/repl.md b/docs/ja/repl.md
new file mode 100644
index 000000000..108c12b90
--- /dev/null
+++ b/docs/ja/repl.md
@@ -0,0 +1,22 @@
+---
+search:
+  exclude: true
+---
+# REPL ユーティリティ
+
+`run_demo_loop` を使うと、ターミナルから手軽にエージェントを試せます。
+
+```python
+import asyncio
+from agents import Agent, run_demo_loop
+
+async def main() -> None:
+    agent = Agent(name="Assistant", instructions="あなたは親切なアシスタントです")
+    await run_demo_loop(agent)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+`run_demo_loop` は入力を繰り返し受け取り、会話履歴を保持したままエージェントを実行します。既定ではストリーミング出力を表示します。
+`quit` または `exit` と入力するか `Ctrl-D` を押すと終了します。
diff --git a/docs/ref/repl.md b/docs/ref/repl.md
new file mode 100644
index 000000000..a064a9bff
--- /dev/null
+++ b/docs/ref/repl.md
@@ -0,0 +1,6 @@
+# `repl`
+
+::: agents.repl
+    options:
+        members:
+            - run_demo_loop
diff --git a/docs/repl.md b/docs/repl.md
new file mode 100644
index 000000000..073b87f51
--- /dev/null
+++ b/docs/repl.md
@@ -0,0 +1,19 @@
+# REPL utility
+
+The SDK provides `run_demo_loop` for quick interactive testing.
+
+```python
+import asyncio
+from agents import Agent, run_demo_loop
+
+async def main() -> None:
+    agent = Agent(name="Assistant", instructions="You are a helpful assistant.")
+    await run_demo_loop(agent)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+`run_demo_loop` prompts for user input in a loop, keeping the conversation
+history between turns. By default it streams model output as it is produced.
+Type `quit` or `exit` (or press `Ctrl-D`) to leave the loop.
diff --git a/mkdocs.yml b/mkdocs.yml
index ad719670c..a58258a93 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -59,6 +59,7 @@ plugins:
                 - running_agents.md
                 - results.md
                 - streaming.md
+                - repl.md
                 - tools.md
                 - mcp.md
                 - handoffs.md
@@ -80,6 +81,7 @@ plugins:
                     - ref/index.md
                     - ref/agent.md
                     - ref/run.md
+                    - ref/repl.md
                     - ref/tool.md
                     - ref/result.md
                     - ref/stream_events.md
@@ -139,6 +141,7 @@ plugins:
                 - running_agents.md
                 - results.md
                 - streaming.md
+                - repl.md
                 - tools.md
                 - mcp.md
                 - handoffs.md
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
index 820616437..ee2f2aa16 100644
--- a/src/agents/__init__.py
+++ b/src/agents/__init__.py
@@ -45,6 +45,7 @@
 from .models.openai_chatcompletions import OpenAIChatCompletionsModel
 from .models.openai_provider import OpenAIProvider
 from .models.openai_responses import OpenAIResponsesModel
+from .repl import run_demo_loop
 from .result import RunResult, RunResultStreaming
 from .run import RunConfig, Runner
 from .run_context import RunContextWrapper, TContext
@@ -160,6 +161,7 @@ def enable_verbose_stdout_logging():
     "ToolsToFinalOutputFunction",
     "ToolsToFinalOutputResult",
     "Runner",
+    "run_demo_loop",
     "Model",
     "ModelProvider",
     "ModelTracing",
diff --git a/src/agents/repl.py b/src/agents/repl.py
new file mode 100644
index 000000000..9a4f30759
--- /dev/null
+++ b/src/agents/repl.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from typing import Any
+
+from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
+
+from .agent import Agent
+from .items import ItemHelpers, TResponseInputItem
+from .result import RunResultBase
+from .run import Runner
+from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent, RunItemStreamEvent
+
+
+async def run_demo_loop(agent: Agent[Any], *, stream: bool = True) -> None:
+    """Run a simple REPL loop with the given agent.
+
+    This utility allows quick manual testing and debugging of an agent from the
+    command line. Conversation state is preserved across turns. Enter ``exit``
+    or ``quit`` to stop the loop.
+
+    Args:
+        agent: The starting agent to run.
+        stream: Whether to stream the agent output.
+    """
+
+    current_agent = agent
+    input_items: list[TResponseInputItem] = []
+    while True:
+        try:
+            user_input = input(" > ")
+        except (EOFError, KeyboardInterrupt):
+            print()
+            break
+        if user_input.strip().lower() in {"exit", "quit"}:
+            break
+        if not user_input:
+            continue
+
+        input_items.append({"role": "user", "content": user_input})
+
+        result: RunResultBase
+        if stream:
+            result = Runner.run_streamed(current_agent, input=input_items)
+            async for event in result.stream_events():
+                if isinstance(event, RawResponsesStreamEvent):
+                    if isinstance(event.data, ResponseTextDeltaEvent):
+                        print(event.data.delta, end="", flush=True)
+                elif isinstance(event, RunItemStreamEvent):
+                    if event.item.type == "tool_call_item":
+                        print("\n[tool called]", flush=True)
+                    elif event.item.type == "tool_call_output_item":
+                        print(f"\n[tool output: {event.item.output}]", flush=True)
+                    elif event.item.type == "message_output_item":
+                        message = ItemHelpers.text_message_output(event.item)
+                        print(message, end="", flush=True)
+                elif isinstance(event, AgentUpdatedStreamEvent):
+                    print(f"\n[Agent updated: {event.new_agent.name}]", flush=True)
+            print()
+        else:
+            result = await Runner.run(current_agent, input_items)
+            if result.final_output is not None:
+                print(result.final_output)
+
+        current_agent = result.last_agent
+        input_items = result.to_input_list()
diff --git a/tests/test_repl.py b/tests/test_repl.py
new file mode 100644
index 000000000..7ba2011be
--- /dev/null
+++ b/tests/test_repl.py
@@ -0,0 +1,28 @@
+import pytest
+
+from agents import Agent, run_demo_loop
+
+from .fake_model import FakeModel
+from .test_responses import get_text_input_item, get_text_message
+
+
+@pytest.mark.asyncio
+async def test_run_demo_loop_conversation(monkeypatch, capsys):
+    model = FakeModel()
+    model.add_multiple_turn_outputs([[get_text_message("hello")], [get_text_message("good")]])
+
+    agent = Agent(name="test", model=model)
+
+    inputs = iter(["Hi", "How are you?", "quit"])
+    monkeypatch.setattr("builtins.input", lambda _=" > ": next(inputs))
+
+    await run_demo_loop(agent, stream=False)
+
+    output = capsys.readouterr().out
+    assert "hello" in output
+    assert "good" in output
+    assert model.last_turn_args["input"] == [
+        get_text_input_item("Hi"),
+        get_text_message("hello").model_dump(exclude_unset=True),
+        get_text_input_item("How are you?"),
+    ]

From 05db7a68cd779ff0af0a91e190b6b9010b24f6a6 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Wed, 4 Jun 2025 15:37:09 -0400
Subject: [PATCH 02/11] Crosslink to js/ts (#815)

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 7dcd97b33..785177916 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,9 @@ The OpenAI Agents SDK is a lightweight yet powerful framework for building multi
 
 <img src="https://cdn.openai.com/API/docs/images/orchestration.png" alt="Image of the Agents Tracing UI" style="max-height: 803px;">
 
+> [!NOTE]
+> Looking for the JavaScript/TypeScript version? Check out [Agents SDK JS/TS](https://github.com/openai/openai-agents-js).
+
 ### Core concepts:
 
 1. [**Agents**](https://openai.github.io/openai-agents-python/agents): LLMs configured with instructions, tools, guardrails, and handoffs

From 5c7c678aeffbaf33c5114460fb481a5584e58d20 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Wed, 4 Jun 2025 16:57:19 -0400
Subject: [PATCH 03/11] Add release documentation (#814)

## Summary
- describe semantic versioning and release steps
- add release page to documentation nav

## Testing
- `make format`
- `make lint`
- `make mypy`
- `make tests`
- `make build-docs`


------
https://chatgpt.com/codex/tasks/task_i_68409d25afdc83218ad362d10c8a80a1
---
 docs/release.md | 18 ++++++++++++++++++
 mkdocs.yml      |  1 +
 2 files changed, 19 insertions(+)
 create mode 100644 docs/release.md

diff --git a/docs/release.md b/docs/release.md
new file mode 100644
index 000000000..3bfc6e9b8
--- /dev/null
+++ b/docs/release.md
@@ -0,0 +1,18 @@
+# Release process
+
+The project follows a slightly modified version of semantic versioning using the form `0.Y.Z`. The leading `0` indicates the SDK is still evolving rapidly. Increment the components as follows:
+
+## Minor (`Y`) versions
+
+We will increase minor versions `Y` for **breaking changes** to any public interfaces that are not marked as beta. For example, going from `0.0.x` to `0.1.x` might include breaking changes.
+
+If you don't want breaking changes, we recommend pinning to `0.0.x` versions in your project.
+
+## Patch (`Z`) versions
+
+We will increment `Z` for non-breaking changes:
+
+- Bug fixes
+- New features
+- Changes to private interfaces
+- Updates to beta features
diff --git a/mkdocs.yml b/mkdocs.yml
index a58258a93..b79e6454f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -72,6 +72,7 @@ plugins:
                     - models/litellm.md
                 - config.md
                 - visualization.md
+                - release.md
                 - Voice agents:
                     - voice/quickstart.md
                     - voice/pipeline.md

From c98e234845949865a90e3a0338580e49e9b4b75b Mon Sep 17 00:00:00 2001
From: James Hills <70035505+jhills20@users.noreply.github.com>
Date: Mon, 9 Jun 2025 07:13:43 -0700
Subject: [PATCH 04/11] Fix handoff transfer message JSON (#818)

## Summary
- ensure `Handoff.get_transfer_message` emits valid JSON
- test transfer message validity

## Testing
- `make format`
- `make lint`
- `make mypy`
- `make tests`


------
https://chatgpt.com/codex/tasks/task_i_68432f925b048324a16878d28e850841
---
 src/agents/handoffs.py     | 4 ++--
 tests/test_handoff_tool.py | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/agents/handoffs.py b/src/agents/handoffs.py
index 50dd417a2..76c93a298 100644
--- a/src/agents/handoffs.py
+++ b/src/agents/handoffs.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import inspect
+import json
 from collections.abc import Awaitable
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Callable, Generic, cast, overload
@@ -99,8 +100,7 @@ class Handoff(Generic[TContext]):
     """
 
     def get_transfer_message(self, agent: Agent[Any]) -> str:
-        base = f"{{'assistant': '{agent.name}'}}"
-        return base
+        return json.dumps({"assistant": agent.name})
 
     @classmethod
     def default_tool_name(cls, agent: Agent[Any]) -> str:
diff --git a/tests/test_handoff_tool.py b/tests/test_handoff_tool.py
index a2a06208f..0a8f064f1 100644
--- a/tests/test_handoff_tool.py
+++ b/tests/test_handoff_tool.py
@@ -1,3 +1,4 @@
+import json
 from typing import Any
 
 import pytest
@@ -276,3 +277,10 @@ def test_handoff_input_schema_is_strict():
         "additionalProperties" in obj.input_json_schema
         and not obj.input_json_schema["additionalProperties"]
     ), "Input schema should be strict and have additionalProperties=False"
+
+
+def test_get_transfer_message_is_valid_json() -> None:
+    agent = Agent(name="foo")
+    obj = handoff(agent)
+    transfer = obj.get_transfer_message(agent)
+    assert json.loads(transfer) == {"assistant": agent.name}

From dcb88e69cd8a0a82894d212dd5ebee118f99057b Mon Sep 17 00:00:00 2001
From: Javier Leguina <92868166+jleguina@users.noreply.github.com>
Date: Mon, 9 Jun 2025 15:24:51 +0100
Subject: [PATCH 05/11] docs: custom output extraction (#817)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In deep agent workflows, each sub‐agent automatically performs an LLM
step to summarize its tool calls before returning to its parent. This
leads to:
1. Excessive latency: every nested agent invokes the LLM, compounding
delays.
2. Loss of raw tool data: summaries may strip out details the top‐level
agent needs.

We discovered that `Agent.as_tool(...)` already accepts an
(undocumented) `custom_output_extractor` parameter. By providing a
callback, a parent agent can override what the sub‐agent returns e.g.
hand back raw tool outputs or a custom slice so that only the final
agent does summarization.

---

This PR adds a “Custom output extraction” section to the Markdown docs
under “Agents as tools,” with a minimal code example.
---
 docs/tools.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/docs/tools.md b/docs/tools.md
index 4e9a20d32..6dba1a853 100644
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -284,6 +284,33 @@ async def run_my_agent() -> str:
     return str(result.final_output)
 ```
 
+### Custom output extraction
+
+In certain cases, you might want to modify the output of the tool-agents before returning it to the central agent. This may be useful if you want to:
+
+- Extract a specific piece of information (e.g., a JSON payload) from the sub-agent's chat history.
+- Convert or reformat the agent’s final answer (e.g., transform Markdown into plain text or CSV).
+- Validate the output or provide a fallback value when the agent’s response is missing or malformed.
+
+You can do this by supplying the `custom_output_extractor` argument to the `as_tool` method:
+
+```python
+async def extract_json_payload(run_result: RunResult) -> str:
+    # Scan the agent’s outputs in reverse order until we find a JSON-like message from a tool call.
+    for item in reversed(run_result.new_items):
+        if isinstance(item, ToolCallOutputItem) and item.output.strip().startswith("{"):
+            return item.output.strip()
+    # Fallback to an empty JSON object if nothing was found
+    return "{}"
+
+
+json_tool = data_agent.as_tool(
+    tool_name="get_data_json",
+    tool_description="Run the data agent and return only its JSON payload",
+    custom_output_extractor=extract_json_payload,
+)
+```
+
 ## Handling errors in function tools
 
 When you create a function tool via `@function_tool`, you can pass a `failure_error_function`. This is a function that provides an error response to the LLM in case the tool call crashes.

From 8dfd6ff35c1b73bc5d2e6a14c0246b03aa2f9f98 Mon Sep 17 00:00:00 2001
From: Niv Hertz <46317590+niv-hertz@users.noreply.github.com>
Date: Mon, 9 Jun 2025 18:08:50 +0300
Subject: [PATCH 06/11] Added support for passing tool_call_id via the
 RunContextWrapper (#766)

This PR fixes issue:
https://github.com/openai/openai-agents-python/issues/559

By adding the tool_call_id to the RunContextWrapper prior to calling
tools. This gives the ability to access the tool_call_id in the
implementation of the tool.
---
 src/agents/_run_impl.py               | 12 +++++----
 src/agents/function_schema.py         |  9 ++++---
 src/agents/tool.py                    | 14 +++++++---
 src/agents/tool_context.py            | 28 +++++++++++++++++++
 tests/test_function_tool.py           | 33 ++++++++++++-----------
 tests/test_function_tool_decorator.py |  9 ++++---
 tests/test_responses.py               |  6 +++--
 tests/test_run_step_execution.py      | 39 +++++++++++++++++++++++++++
 8 files changed, 115 insertions(+), 35 deletions(-)
 create mode 100644 src/agents/tool_context.py

diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
index e98010555..a75c5e825 100644
--- a/src/agents/_run_impl.py
+++ b/src/agents/_run_impl.py
@@ -75,6 +75,7 @@
     MCPToolApprovalRequest,
     Tool,
 )
+from .tool_context import ToolContext
 from .tracing import (
     SpanError,
     Trace,
@@ -543,23 +544,24 @@ async def run_single_tool(
             func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
         ) -> Any:
             with function_span(func_tool.name) as span_fn:
+                tool_context = ToolContext.from_agent_context(context_wrapper, tool_call.call_id)
                 if config.trace_include_sensitive_data:
                     span_fn.span_data.input = tool_call.arguments
                 try:
                     _, _, result = await asyncio.gather(
-                        hooks.on_tool_start(context_wrapper, agent, func_tool),
+                        hooks.on_tool_start(tool_context, agent, func_tool),
                         (
-                            agent.hooks.on_tool_start(context_wrapper, agent, func_tool)
+                            agent.hooks.on_tool_start(tool_context, agent, func_tool)
                             if agent.hooks
                             else _coro.noop_coroutine()
                         ),
-                        func_tool.on_invoke_tool(context_wrapper, tool_call.arguments),
+                        func_tool.on_invoke_tool(tool_context, tool_call.arguments),
                     )
 
                     await asyncio.gather(
-                        hooks.on_tool_end(context_wrapper, agent, func_tool, result),
+                        hooks.on_tool_end(tool_context, agent, func_tool, result),
                         (
-                            agent.hooks.on_tool_end(context_wrapper, agent, func_tool, result)
+                            agent.hooks.on_tool_end(tool_context, agent, func_tool, result)
                             if agent.hooks
                             else _coro.noop_coroutine()
                         ),
diff --git a/src/agents/function_schema.py b/src/agents/function_schema.py
index 0e5868965..188566970 100644
--- a/src/agents/function_schema.py
+++ b/src/agents/function_schema.py
@@ -13,6 +13,7 @@
 from .exceptions import UserError
 from .run_context import RunContextWrapper
 from .strict_schema import ensure_strict_json_schema
+from .tool_context import ToolContext
 
 
 @dataclass
@@ -237,21 +238,21 @@ def function_schema(
         ann = type_hints.get(first_name, first_param.annotation)
         if ann != inspect._empty:
             origin = get_origin(ann) or ann
-            if origin is RunContextWrapper:
+            if origin is RunContextWrapper or origin is ToolContext:
                 takes_context = True  # Mark that the function takes context
             else:
                 filtered_params.append((first_name, first_param))
         else:
             filtered_params.append((first_name, first_param))
 
-    # For parameters other than the first, raise error if any use RunContextWrapper.
+    # For parameters other than the first, raise error if any use RunContextWrapper or ToolContext.
     for name, param in params[1:]:
         ann = type_hints.get(name, param.annotation)
         if ann != inspect._empty:
             origin = get_origin(ann) or ann
-            if origin is RunContextWrapper:
+            if origin is RunContextWrapper or origin is ToolContext:
                 raise UserError(
-                    f"RunContextWrapper param found at non-first position in function"
+                    f"RunContextWrapper/ToolContext param found at non-first position in function"
                     f" {func.__name__}"
                 )
         filtered_params.append((name, param))
diff --git a/src/agents/tool.py b/src/agents/tool.py
index 57272f9f4..ce66a53ba 100644
--- a/src/agents/tool.py
+++ b/src/agents/tool.py
@@ -20,6 +20,7 @@
 from .items import RunItem
 from .logger import logger
 from .run_context import RunContextWrapper
+from .tool_context import ToolContext
 from .tracing import SpanError
 from .util import _error_tracing
 from .util._types import MaybeAwaitable
@@ -31,8 +32,13 @@
 
 ToolFunctionWithoutContext = Callable[ToolParams, Any]
 ToolFunctionWithContext = Callable[Concatenate[RunContextWrapper[Any], ToolParams], Any]
+ToolFunctionWithToolContext = Callable[Concatenate[ToolContext, ToolParams], Any]
 
-ToolFunction = Union[ToolFunctionWithoutContext[ToolParams], ToolFunctionWithContext[ToolParams]]
+ToolFunction = Union[
+    ToolFunctionWithoutContext[ToolParams],
+    ToolFunctionWithContext[ToolParams],
+    ToolFunctionWithToolContext[ToolParams],
+]
 
 
 @dataclass
@@ -62,7 +68,7 @@ class FunctionTool:
     params_json_schema: dict[str, Any]
     """The JSON schema for the tool's parameters."""
 
-    on_invoke_tool: Callable[[RunContextWrapper[Any], str], Awaitable[Any]]
+    on_invoke_tool: Callable[[ToolContext[Any], str], Awaitable[Any]]
     """A function that invokes the tool with the given context and parameters. The params passed
     are:
     1. The tool run context.
@@ -344,7 +350,7 @@ def _create_function_tool(the_func: ToolFunction[...]) -> FunctionTool:
             strict_json_schema=strict_mode,
         )
 
-        async def _on_invoke_tool_impl(ctx: RunContextWrapper[Any], input: str) -> Any:
+        async def _on_invoke_tool_impl(ctx: ToolContext[Any], input: str) -> Any:
             try:
                 json_data: dict[str, Any] = json.loads(input) if input else {}
             except Exception as e:
@@ -393,7 +399,7 @@ async def _on_invoke_tool_impl(ctx: RunContextWrapper[Any], input: str) -> Any:
 
             return result
 
-        async def _on_invoke_tool(ctx: RunContextWrapper[Any], input: str) -> Any:
+        async def _on_invoke_tool(ctx: ToolContext[Any], input: str) -> Any:
             try:
                 return await _on_invoke_tool_impl(ctx, input)
             except Exception as e:
diff --git a/src/agents/tool_context.py b/src/agents/tool_context.py
new file mode 100644
index 000000000..17b595f06
--- /dev/null
+++ b/src/agents/tool_context.py
@@ -0,0 +1,28 @@
+from dataclasses import dataclass, field, fields
+from typing import Any
+
+from .run_context import RunContextWrapper, TContext
+
+
+def _assert_must_pass_tool_call_id() -> str:
+    raise ValueError("tool_call_id must be passed to ToolContext")
+
+@dataclass
+class ToolContext(RunContextWrapper[TContext]):
+    """The context of a tool call."""
+
+    tool_call_id: str = field(default_factory=_assert_must_pass_tool_call_id)
+    """The ID of the tool call."""
+
+    @classmethod
+    def from_agent_context(
+        cls, context: RunContextWrapper[TContext], tool_call_id: str
+    ) -> "ToolContext":
+        """
+        Create a ToolContext from a RunContextWrapper.
+        """
+        # Grab the names of the RunContextWrapper's init=True fields
+        base_values: dict[str, Any] = {
+            f.name: getattr(context, f.name) for f in fields(RunContextWrapper) if f.init
+        }
+        return cls(tool_call_id=tool_call_id, **base_values)
diff --git a/tests/test_function_tool.py b/tests/test_function_tool.py
index c5d7da649..b232bf75e 100644
--- a/tests/test_function_tool.py
+++ b/tests/test_function_tool.py
@@ -7,6 +7,7 @@
 
 from agents import Agent, FunctionTool, ModelBehaviorError, RunContextWrapper, function_tool
 from agents.tool import default_tool_error_function
+from agents.tool_context import ToolContext
 
 
 def argless_function() -> str:
@@ -18,11 +19,11 @@ async def test_argless_function():
     tool = function_tool(argless_function)
     assert tool.name == "argless_function"
 
-    result = await tool.on_invoke_tool(RunContextWrapper(None), "")
+    result = await tool.on_invoke_tool(ToolContext(context=None, tool_call_id="1"), "")
     assert result == "ok"
 
 
-def argless_with_context(ctx: RunContextWrapper[str]) -> str:
+def argless_with_context(ctx: ToolContext[str]) -> str:
     return "ok"
 
 
@@ -31,11 +32,11 @@ async def test_argless_with_context():
     tool = function_tool(argless_with_context)
     assert tool.name == "argless_with_context"
 
-    result = await tool.on_invoke_tool(RunContextWrapper(None), "")
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), "")
     assert result == "ok"
 
     # Extra JSON should not raise an error
-    result = await tool.on_invoke_tool(RunContextWrapper(None), '{"a": 1}')
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), '{"a": 1}')
     assert result == "ok"
 
 
@@ -48,15 +49,15 @@ async def test_simple_function():
     tool = function_tool(simple_function, failure_error_function=None)
     assert tool.name == "simple_function"
 
-    result = await tool.on_invoke_tool(RunContextWrapper(None), '{"a": 1}')
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), '{"a": 1}')
     assert result == 6
 
-    result = await tool.on_invoke_tool(RunContextWrapper(None), '{"a": 1, "b": 2}')
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), '{"a": 1, "b": 2}')
     assert result == 3
 
     # Missing required argument should raise an error
     with pytest.raises(ModelBehaviorError):
-        await tool.on_invoke_tool(RunContextWrapper(None), "")
+        await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), "")
 
 
 class Foo(BaseModel):
@@ -84,7 +85,7 @@ async def test_complex_args_function():
             "bar": Bar(x="hello", y=10),
         }
     )
-    result = await tool.on_invoke_tool(RunContextWrapper(None), valid_json)
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), valid_json)
     assert result == "6 hello10 hello"
 
     valid_json = json.dumps(
@@ -93,7 +94,7 @@ async def test_complex_args_function():
             "bar": Bar(x="hello", y=10),
         }
     )
-    result = await tool.on_invoke_tool(RunContextWrapper(None), valid_json)
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), valid_json)
     assert result == "3 hello10 hello"
 
     valid_json = json.dumps(
@@ -103,12 +104,12 @@ async def test_complex_args_function():
             "baz": "world",
         }
     )
-    result = await tool.on_invoke_tool(RunContextWrapper(None), valid_json)
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), valid_json)
     assert result == "3 hello10 world"
 
     # Missing required argument should raise an error
     with pytest.raises(ModelBehaviorError):
-        await tool.on_invoke_tool(RunContextWrapper(None), '{"foo": {"a": 1}}')
+        await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), '{"foo": {"a": 1}}')
 
 
 def test_function_config_overrides():
@@ -168,7 +169,7 @@ async def run_function(ctx: RunContextWrapper[Any], args: str) -> str:
         assert tool.params_json_schema[key] == value
     assert tool.strict_json_schema
 
-    result = await tool.on_invoke_tool(RunContextWrapper(None), '{"data": "hello"}')
+    result = await tool.on_invoke_tool(ToolContext(None, tool_call_id="1"), '{"data": "hello"}')
     assert result == "hello_done"
 
     tool_not_strict = FunctionTool(
@@ -183,7 +184,7 @@ async def run_function(ctx: RunContextWrapper[Any], args: str) -> str:
     assert "additionalProperties" not in tool_not_strict.params_json_schema
 
     result = await tool_not_strict.on_invoke_tool(
-        RunContextWrapper(None), '{"data": "hello", "bar": "baz"}'
+        ToolContext(None, tool_call_id="1"), '{"data": "hello", "bar": "baz"}'
     )
     assert result == "hello_done"
 
@@ -194,7 +195,7 @@ def my_func(a: int, b: int = 5):
         raise ValueError("test")
 
     tool = function_tool(my_func)
-    ctx = RunContextWrapper(None)
+    ctx = ToolContext(None, tool_call_id="1")
 
     result = await tool.on_invoke_tool(ctx, "")
     assert "Invalid JSON" in str(result)
@@ -218,7 +219,7 @@ def custom_sync_error_function(ctx: RunContextWrapper[Any], error: Exception) ->
         return f"error_{error.__class__.__name__}"
 
     tool = function_tool(my_func, failure_error_function=custom_sync_error_function)
-    ctx = RunContextWrapper(None)
+    ctx = ToolContext(None, tool_call_id="1")
 
     result = await tool.on_invoke_tool(ctx, "")
     assert result == "error_ModelBehaviorError"
@@ -242,7 +243,7 @@ def custom_sync_error_function(ctx: RunContextWrapper[Any], error: Exception) ->
         return f"error_{error.__class__.__name__}"
 
     tool = function_tool(my_func, failure_error_function=custom_sync_error_function)
-    ctx = RunContextWrapper(None)
+    ctx = ToolContext(None, tool_call_id="1")
 
     result = await tool.on_invoke_tool(ctx, "")
     assert result == "error_ModelBehaviorError"
diff --git a/tests/test_function_tool_decorator.py b/tests/test_function_tool_decorator.py
index 3b52788fb..d334d8f84 100644
--- a/tests/test_function_tool_decorator.py
+++ b/tests/test_function_tool_decorator.py
@@ -7,6 +7,7 @@
 
 from agents import function_tool
 from agents.run_context import RunContextWrapper
+from agents.tool_context import ToolContext
 
 
 class DummyContext:
@@ -14,8 +15,8 @@ def __init__(self):
         self.data = "something"
 
 
-def ctx_wrapper() -> RunContextWrapper[DummyContext]:
-    return RunContextWrapper(DummyContext())
+def ctx_wrapper() -> ToolContext[DummyContext]:
+    return ToolContext(context=DummyContext(), tool_call_id="1")
 
 
 @function_tool
@@ -44,7 +45,7 @@ async def test_sync_no_context_with_args_invocation():
 
 
 @function_tool
-def sync_with_context(ctx: RunContextWrapper[DummyContext], name: str) -> str:
+def sync_with_context(ctx: ToolContext[DummyContext], name: str) -> str:
     return f"{name}_{ctx.context.data}"
 
 
@@ -71,7 +72,7 @@ async def test_async_no_context_invocation():
 
 
 @function_tool
-async def async_with_context(ctx: RunContextWrapper[DummyContext], prefix: str, num: int) -> str:
+async def async_with_context(ctx: ToolContext[DummyContext], prefix: str, num: int) -> str:
     await asyncio.sleep(0)
     return f"{prefix}-{num}-{ctx.context.data}"
 
diff --git a/tests/test_responses.py b/tests/test_responses.py
index 6b91bf8c6..74212f61b 100644
--- a/tests/test_responses.py
+++ b/tests/test_responses.py
@@ -49,10 +49,12 @@ def _foo() -> str:
     )
 
 
-def get_function_tool_call(name: str, arguments: str | None = None) -> ResponseOutputItem:
+def get_function_tool_call(
+    name: str, arguments: str | None = None, call_id: str | None = None
+) -> ResponseOutputItem:
     return ResponseFunctionToolCall(
         id="1",
-        call_id="2",
+        call_id=call_id or "2",
         type="function_call",
         name=name,
         arguments=arguments or "",
diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py
index b4c83d015..6f446649a 100644
--- a/tests/test_run_step_execution.py
+++ b/tests/test_run_step_execution.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 from typing import Any
 
 import pytest
@@ -26,6 +27,8 @@
     RunImpl,
     SingleStepResult,
 )
+from agents.tool import function_tool
+from agents.tool_context import ToolContext
 
 from .test_responses import (
     get_final_output_message,
@@ -158,6 +161,42 @@ async def test_multiple_tool_calls():
     assert isinstance(result.next_step, NextStepRunAgain)
 
 
+@pytest.mark.asyncio
+async def test_multiple_tool_calls_with_tool_context():
+    async def _fake_tool(context: ToolContext[str], value: str) -> str:
+        return f"{value}-{context.tool_call_id}"
+
+    tool = function_tool(_fake_tool, name_override="fake_tool", failure_error_function=None)
+
+    agent = Agent(
+        name="test",
+        tools=[tool],
+    )
+    response = ModelResponse(
+        output=[
+            get_function_tool_call("fake_tool", json.dumps({"value": "123"}), call_id="1"),
+            get_function_tool_call("fake_tool", json.dumps({"value": "456"}), call_id="2"),
+        ],
+        usage=Usage(),
+        response_id=None,
+    )
+
+    result = await get_execute_result(agent, response)
+    assert result.original_input == "hello"
+
+    # 4 items: new message, 2 tool calls, 2 tool call outputs
+    assert len(result.generated_items) == 4
+    assert isinstance(result.next_step, NextStepRunAgain)
+
+    items = result.generated_items
+    assert_item_is_function_tool_call(items[0], "fake_tool", json.dumps({"value": "123"}))
+    assert_item_is_function_tool_call(items[1], "fake_tool", json.dumps({"value": "456"}))
+    assert_item_is_function_tool_call_output(items[2], "123-1")
+    assert_item_is_function_tool_call_output(items[3], "456-2")
+
+    assert isinstance(result.next_step, NextStepRunAgain)
+
+
 @pytest.mark.asyncio
 async def test_handoff_output_leads_to_handoff_next_step():
     agent_1 = Agent(name="test_1")

From 0eee6b8305e52aa4de7cae74db4c22633019522a Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Tue, 10 Jun 2025 18:14:34 -0400
Subject: [PATCH 07/11] Allow arbitrary kwargs in model (#842)

Sometimes users want to provide parameters specific to a model provider.
This is an escape hatch.
---
 src/agents/extensions/models/litellm_model.py |   4 +
 src/agents/model_settings.py                  |  15 ++
 src/agents/models/openai_chatcompletions.py   |   1 +
 src/agents/models/openai_responses.py         |   1 +
 tests/model_settings/test_serialization.py    |  74 ++++++++
 tests/models/test_kwargs_functionality.py     | 177 ++++++++++++++++++
 6 files changed, 272 insertions(+)
 create mode 100644 tests/models/test_kwargs_functionality.py

diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py
index 7cf7a2de5..3ded80be2 100644
--- a/src/agents/extensions/models/litellm_model.py
+++ b/src/agents/extensions/models/litellm_model.py
@@ -284,6 +284,10 @@ async def _fetch_response(
         if model_settings.extra_body and isinstance(model_settings.extra_body, dict):
             extra_kwargs.update(model_settings.extra_body)
 
+        # Add kwargs from model_settings.extra_args, filtering out None values
+        if model_settings.extra_args:
+            extra_kwargs.update(model_settings.extra_args)
+
         ret = await litellm.acompletion(
             model=self.model,
             messages=converted_messages,
diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py
index 7b016c98f..881390279 100644
--- a/src/agents/model_settings.py
+++ b/src/agents/model_settings.py
@@ -73,6 +73,11 @@ class ModelSettings:
     """Additional headers to provide with the request.
     Defaults to None if not provided."""
 
+    extra_args: dict[str, Any] | None = None
+    """Arbitrary keyword arguments to pass to the model API call.
+    These will be passed directly to the underlying model provider's API.
+    Use with caution as not all models support all parameters."""
+
     def resolve(self, override: ModelSettings | None) -> ModelSettings:
         """Produce a new ModelSettings by overlaying any non-None values from the
         override on top of this instance."""
@@ -84,6 +89,16 @@ def resolve(self, override: ModelSettings | None) -> ModelSettings:
             for field in fields(self)
             if getattr(override, field.name) is not None
         }
+
+        # Handle extra_args merging specially - merge dictionaries instead of replacing
+        if self.extra_args is not None or override.extra_args is not None:
+            merged_args = {}
+            if self.extra_args:
+                merged_args.update(self.extra_args)
+            if override.extra_args:
+                merged_args.update(override.extra_args)
+            changes["extra_args"] = merged_args if merged_args else None
+
         return replace(self, **changes)
 
     def to_json_dict(self) -> dict[str, Any]:
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
index 6b4045d21..4dad976bd 100644
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@@ -281,6 +281,7 @@ async def _fetch_response(
             extra_query=model_settings.extra_query,
             extra_body=model_settings.extra_body,
             metadata=self._non_null_or_not_given(model_settings.metadata),
+            **(model_settings.extra_args or {}),
         )
 
         if isinstance(ret, ChatCompletion):
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index 86c8e69cb..fdc391c6a 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -266,6 +266,7 @@ async def _fetch_response(
             store=self._non_null_or_not_given(model_settings.store),
             reasoning=self._non_null_or_not_given(model_settings.reasoning),
             metadata=self._non_null_or_not_given(model_settings.metadata),
+            **(model_settings.extra_args or {}),
         )
 
     def _get_client(self) -> AsyncOpenAI:
diff --git a/tests/model_settings/test_serialization.py b/tests/model_settings/test_serialization.py
index d76a58d17..ad4db4019 100644
--- a/tests/model_settings/test_serialization.py
+++ b/tests/model_settings/test_serialization.py
@@ -47,6 +47,7 @@ def test_all_fields_serialization() -> None:
         extra_query={"foo": "bar"},
         extra_body={"foo": "bar"},
         extra_headers={"foo": "bar"},
+        extra_args={"custom_param": "value", "another_param": 42},
     )
 
     # Verify that every single field is set to a non-None value
@@ -57,3 +58,76 @@ def test_all_fields_serialization() -> None:
 
     # Now, lets serialize the ModelSettings instance to a JSON string
     verify_serialization(model_settings)
+
+
+def test_extra_args_serialization() -> None:
+    """Test that extra_args are properly serialized."""
+    model_settings = ModelSettings(
+        temperature=0.5,
+        extra_args={"custom_param": "value", "another_param": 42, "nested": {"key": "value"}},
+    )
+
+    json_dict = model_settings.to_json_dict()
+    assert json_dict["extra_args"] == {
+        "custom_param": "value",
+        "another_param": 42,
+        "nested": {"key": "value"},
+    }
+
+    # Verify serialization works
+    verify_serialization(model_settings)
+
+
+def test_extra_args_resolve() -> None:
+    """Test that extra_args are properly merged in the resolve method."""
+    base_settings = ModelSettings(
+        temperature=0.5, extra_args={"param1": "base_value", "param2": "base_only"}
+    )
+
+    override_settings = ModelSettings(
+        top_p=0.9, extra_args={"param1": "override_value", "param3": "override_only"}
+    )
+
+    resolved = base_settings.resolve(override_settings)
+
+    # Check that regular fields are properly resolved
+    assert resolved.temperature == 0.5  # from base
+    assert resolved.top_p == 0.9  # from override
+
+    # Check that extra_args are properly merged
+    expected_extra_args = {
+        "param1": "override_value",  # override wins
+        "param2": "base_only",  # from base
+        "param3": "override_only",  # from override
+    }
+    assert resolved.extra_args == expected_extra_args
+
+
+def test_extra_args_resolve_with_none() -> None:
+    """Test that resolve works properly when one side has None extra_args."""
+    # Base with extra_args, override with None
+    base_settings = ModelSettings(extra_args={"param1": "value1"})
+    override_settings = ModelSettings(temperature=0.8)
+
+    resolved = base_settings.resolve(override_settings)
+    assert resolved.extra_args == {"param1": "value1"}
+    assert resolved.temperature == 0.8
+
+    # Base with None, override with extra_args
+    base_settings = ModelSettings(temperature=0.5)
+    override_settings = ModelSettings(extra_args={"param2": "value2"})
+
+    resolved = base_settings.resolve(override_settings)
+    assert resolved.extra_args == {"param2": "value2"}
+    assert resolved.temperature == 0.5
+
+
+def test_extra_args_resolve_both_none() -> None:
+    """Test that resolve works when both sides have None extra_args."""
+    base_settings = ModelSettings(temperature=0.5)
+    override_settings = ModelSettings(top_p=0.9)
+
+    resolved = base_settings.resolve(override_settings)
+    assert resolved.extra_args is None
+    assert resolved.temperature == 0.5
+    assert resolved.top_p == 0.9
diff --git a/tests/models/test_kwargs_functionality.py b/tests/models/test_kwargs_functionality.py
new file mode 100644
index 000000000..210610a02
--- /dev/null
+++ b/tests/models/test_kwargs_functionality.py
@@ -0,0 +1,177 @@
+import litellm
+import pytest
+from litellm.types.utils import Choices, Message, ModelResponse, Usage
+from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion_message import ChatCompletionMessage
+from openai.types.completion_usage import CompletionUsage
+
+from agents.extensions.models.litellm_model import LitellmModel
+from agents.model_settings import ModelSettings
+from agents.models.interface import ModelTracing
+from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_litellm_kwargs_forwarded(monkeypatch):
+    """
+    Test that kwargs from ModelSettings are forwarded to litellm.acompletion.
+    """
+    captured: dict[str, object] = {}
+
+    async def fake_acompletion(model, messages=None, **kwargs):
+        captured.update(kwargs)
+        msg = Message(role="assistant", content="test response")
+        choice = Choices(index=0, message=msg)
+        return ModelResponse(choices=[choice], usage=Usage(0, 0, 0))
+
+    monkeypatch.setattr(litellm, "acompletion", fake_acompletion)
+
+    settings = ModelSettings(
+        temperature=0.5,
+        extra_args={
+            "custom_param": "custom_value",
+            "seed": 42,
+            "stop": ["END"],
+            "logit_bias": {123: -100},
+        },
+    )
+    model = LitellmModel(model="test-model")
+
+    await model.get_response(
+        system_instructions=None,
+        input="test input",
+        model_settings=settings,
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
+    )
+
+    # Verify that all kwargs were passed through
+    assert captured["custom_param"] == "custom_value"
+    assert captured["seed"] == 42
+    assert captured["stop"] == ["END"]
+    assert captured["logit_bias"] == {123: -100}
+
+    # Verify regular parameters are still passed
+    assert captured["temperature"] == 0.5
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_openai_chatcompletions_kwargs_forwarded(monkeypatch):
+    """
+    Test that kwargs from ModelSettings are forwarded to OpenAI chat completions API.
+    """
+    captured: dict[str, object] = {}
+
+    class MockChatCompletions:
+        async def create(self, **kwargs):
+            captured.update(kwargs)
+            msg = ChatCompletionMessage(role="assistant", content="test response")
+            choice = Choice(index=0, message=msg, finish_reason="stop")
+            return ChatCompletion(
+                id="test-id",
+                created=0,
+                model="gpt-4",
+                object="chat.completion",
+                choices=[choice],
+                usage=CompletionUsage(completion_tokens=5, prompt_tokens=10, total_tokens=15),
+            )
+
+    class MockChat:
+        def __init__(self):
+            self.completions = MockChatCompletions()
+
+    class MockClient:
+        def __init__(self):
+            self.chat = MockChat()
+            self.base_url = "https://api.openai.com/v1"
+
+    settings = ModelSettings(
+        temperature=0.7,
+        extra_args={
+            "seed": 123,
+            "logit_bias": {456: 10},
+            "stop": ["STOP", "END"],
+            "user": "test-user",
+        },
+    )
+
+    mock_client = MockClient()
+    model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=mock_client)  # type: ignore
+
+    await model.get_response(
+        system_instructions="Test system",
+        input="test input",
+        model_settings=settings,
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
+    )
+
+    # Verify that all kwargs were passed through
+    assert captured["seed"] == 123
+    assert captured["logit_bias"] == {456: 10}
+    assert captured["stop"] == ["STOP", "END"]
+    assert captured["user"] == "test-user"
+
+    # Verify regular parameters are still passed
+    assert captured["temperature"] == 0.7
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_empty_kwargs_handling(monkeypatch):
+    """
+    Test that empty or None kwargs are handled gracefully.
+    """
+    captured: dict[str, object] = {}
+
+    async def fake_acompletion(model, messages=None, **kwargs):
+        captured.update(kwargs)
+        msg = Message(role="assistant", content="test response")
+        choice = Choices(index=0, message=msg)
+        return ModelResponse(choices=[choice], usage=Usage(0, 0, 0))
+
+    monkeypatch.setattr(litellm, "acompletion", fake_acompletion)
+
+    # Test with None kwargs
+    settings_none = ModelSettings(temperature=0.5, extra_args=None)
+    model = LitellmModel(model="test-model")
+
+    await model.get_response(
+        system_instructions=None,
+        input="test input",
+        model_settings=settings_none,
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
+    )
+
+    # Should work without error and include regular parameters
+    assert captured["temperature"] == 0.5
+
+    # Test with empty dict
+    captured.clear()
+    settings_empty = ModelSettings(temperature=0.3, extra_args={})
+
+    await model.get_response(
+        system_instructions=None,
+        input="test input",
+        model_settings=settings_empty,
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
+    )
+
+    # Should work without error and include regular parameters
+    assert captured["temperature"] == 0.3

From 6d2806f10c59df6753a23000709ad807ba62aca1 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 16 Jun 2025 10:50:20 -0400
Subject: [PATCH 08/11] Fix function_schema name override bug (#872)

## Summary
- ensure `name_override` is always used in `function_schema`
- test name override when docstring info is disabled

## Testing
- `make format`
- `make lint`
- `make mypy`
- `make tests`

Resolves #860
------
https://chatgpt.com/codex/tasks/task_i_684f1cf885b08321b4dd3f4294e24ca2
---
 src/agents/function_schema.py |  3 ++-
 tests/test_function_schema.py | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/agents/function_schema.py b/src/agents/function_schema.py
index 188566970..dd1db1fee 100644
--- a/src/agents/function_schema.py
+++ b/src/agents/function_schema.py
@@ -223,7 +223,8 @@ def function_schema(
         doc_info = None
         param_descs = {}
 
-    func_name = name_override or doc_info.name if doc_info else func.__name__
+    # Ensure name_override takes precedence even if docstring info is disabled.
+    func_name = name_override or (doc_info.name if doc_info else func.__name__)
 
     # 2. Inspect function signature and get type hints
     sig = inspect.signature(func)
diff --git a/tests/test_function_schema.py b/tests/test_function_schema.py
index 5618d8ae9..54e7e2f9e 100644
--- a/tests/test_function_schema.py
+++ b/tests/test_function_schema.py
@@ -439,3 +439,15 @@ def func_with_mapping(test_one: Mapping[str, int]) -> str:
 
     with pytest.raises(UserError):
         function_schema(func_with_mapping)
+
+
+def test_name_override_without_docstring() -> None:
+    """name_override should be used even when not parsing docstrings."""
+
+    def foo(x: int) -> int:
+        return x
+
+    fs = function_schema(foo, use_docstring_info=False, name_override="custom")
+
+    assert fs.name == "custom"
+    assert fs.params_json_schema.get("title") == "custom_args"

From 281a7b2bb61cb20346e75234fbfaa2bf4ab808cc Mon Sep 17 00:00:00 2001
From: Daniele Morotti <58258368+DanieleMorotti@users.noreply.github.com>
Date: Mon, 16 Jun 2025 17:35:09 +0200
Subject: [PATCH 09/11] adopted float instead of timedelta for timeout
 parameters (#874)

I replaced the `timedelta` parameters for MCP timeouts with `float`
values, addressing issue #845 .

Given that the MCP official repository has incorporated these changes in
[this PR](https://github.com/modelcontextprotocol/python-sdk/pull/941),
updating the MCP version in openai-agents and specifying the timeouts as
floats should be enough.
---
 pyproject.toml           | 2 +-
 src/agents/mcp/server.py | 8 ++++----
 uv.lock                  | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b59073ed8..a674dac96 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
     "typing-extensions>=4.12.2, <5",
     "requests>=2.0, <3",
     "types-requests>=2.0, <3",
-    "mcp>=1.8.0, <2; python_version >= '3.10'",
+    "mcp>=1.9.4, <2; python_version >= '3.10'",
 ]
 classifiers = [
     "Typing :: Typed",
diff --git a/src/agents/mcp/server.py b/src/agents/mcp/server.py
index b012a0968..3d1e17790 100644
--- a/src/agents/mcp/server.py
+++ b/src/agents/mcp/server.py
@@ -340,10 +340,10 @@ class MCPServerStreamableHttpParams(TypedDict):
     headers: NotRequired[dict[str, str]]
     """The headers to send to the server."""
 
-    timeout: NotRequired[timedelta]
+    timeout: NotRequired[timedelta | float]
     """The timeout for the HTTP request. Defaults to 5 seconds."""
 
-    sse_read_timeout: NotRequired[timedelta]
+    sse_read_timeout: NotRequired[timedelta | float]
     """The timeout for the SSE connection, in seconds. Defaults to 5 minutes."""
 
     terminate_on_close: NotRequired[bool]
@@ -401,8 +401,8 @@ def create_streams(
         return streamablehttp_client(
             url=self.params["url"],
             headers=self.params.get("headers", None),
-            timeout=self.params.get("timeout", timedelta(seconds=30)),
-            sse_read_timeout=self.params.get("sse_read_timeout", timedelta(seconds=60 * 5)),
+            timeout=self.params.get("timeout", 5),
+            sse_read_timeout=self.params.get("sse_read_timeout", 60 * 5),
             terminate_on_close=self.params.get("terminate_on_close", True),
         )
 
diff --git a/uv.lock b/uv.lock
index 1779b8637..56f56f413 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1047,7 +1047,7 @@ wheels = [
 
 [[package]]
 name = "mcp"
-version = "1.8.1"
+version = "1.9.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio", marker = "python_full_version >= '3.10'" },
@@ -1060,9 +1060,9 @@ dependencies = [
     { name = "starlette", marker = "python_full_version >= '3.10'" },
     { name = "uvicorn", marker = "python_full_version >= '3.10' and sys_platform != 'emscripten'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7c/13/16b712e8a3be6a736b411df2fc6b4e75eb1d3e99b1cd57a3a1decf17f612/mcp-1.8.1.tar.gz", hash = "sha256:ec0646271d93749f784d2316fb5fe6102fb0d1be788ec70a9e2517e8f2722c0e", size = 265605 }
+sdist = { url = "https://files.pythonhosted.org/packages/06/f2/dc2450e566eeccf92d89a00c3e813234ad58e2ba1e31d11467a09ac4f3b9/mcp-1.9.4.tar.gz", hash = "sha256:cfb0bcd1a9535b42edaef89947b9e18a8feb49362e1cc059d6e7fc636f2cb09f", size = 333294 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1c/5d/91cf0d40e40ae9ecf8d4004e0f9611eea86085aa0b5505493e0ff53972da/mcp-1.8.1-py3-none-any.whl", hash = "sha256:948e03783859fa35abe05b9b6c0a1d5519be452fc079dc8d7f682549591c1770", size = 119761 },
+    { url = "https://files.pythonhosted.org/packages/97/fc/80e655c955137393c443842ffcc4feccab5b12fa7cb8de9ced90f90e6998/mcp-1.9.4-py3-none-any.whl", hash = "sha256:7fcf36b62936adb8e63f89346bccca1268eeca9bf6dfb562ee10b1dfbda9dac0", size = 130232 },
 ]
 
 [[package]]
@@ -1534,7 +1534,7 @@ requires-dist = [
     { name = "graphviz", marker = "extra == 'viz'", specifier = ">=0.17" },
     { name = "griffe", specifier = ">=1.5.6,<2" },
     { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.67.4.post1,<2" },
-    { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.8.0,<2" },
+    { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.9.4,<2" },
     { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" },
     { name = "openai", specifier = ">=1.81.0" },
     { name = "pydantic", specifier = ">=2.10,<3" },

From 2b9b8f7e7363490c2d38d319cd1acaa288595f9a Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 16 Jun 2025 15:47:48 -0400
Subject: [PATCH 10/11] Prompts support (#876)

Add support for the new openai prompts feature.
---
 examples/basic/prompt_template.py             | 79 +++++++++++++++
 pyproject.toml                                |  2 +-
 src/agents/__init__.py                        |  4 +
 src/agents/agent.py                           | 14 +++
 src/agents/extensions/models/litellm_model.py |  8 +-
 src/agents/models/interface.py                |  6 ++
 src/agents/models/openai_chatcompletions.py   |  9 +-
 src/agents/models/openai_responses.py         |  9 ++
 src/agents/prompts.py                         | 76 +++++++++++++++
 src/agents/run.py                             | 17 +++-
 src/agents/tool_context.py                    |  1 +
 tests/fake_model.py                           |  2 +
 .../test_litellm_chatcompletions_stream.py    |  3 +
 tests/test_agent_prompt.py                    | 97 +++++++++++++++++++
 tests/test_openai_chatcompletions.py          |  4 +
 tests/test_openai_chatcompletions_stream.py   |  3 +
 tests/test_responses_tracing.py               |  6 ++
 tests/voice/test_workflow.py                  |  3 +
 uv.lock                                       |  8 +-
 19 files changed, 342 insertions(+), 9 deletions(-)
 create mode 100644 examples/basic/prompt_template.py
 create mode 100644 src/agents/prompts.py
 create mode 100644 tests/test_agent_prompt.py

diff --git a/examples/basic/prompt_template.py b/examples/basic/prompt_template.py
new file mode 100644
index 000000000..59251935e
--- /dev/null
+++ b/examples/basic/prompt_template.py
@@ -0,0 +1,79 @@
+import argparse
+import asyncio
+import random
+
+from agents import Agent, GenerateDynamicPromptData, Runner
+
+"""
+NOTE: This example will not work out of the box, because the default prompt ID will not be available
+in your project.
+
+To use it, please:
+1. Go to https://platform.openai.com/playground/prompts
+2. Create a new prompt variable, `poem_style`.
+3. Create a system prompt with the content:
+```
+Write a poem in {{poem_style}}
+```
+4. Run the example with the `--prompt-id` flag.
+"""
+
+DEFAULT_PROMPT_ID = "pmpt_6850729e8ba481939fd439e058c69ee004afaa19c520b78b"
+
+
+class DynamicContext:
+    def __init__(self, prompt_id: str):
+        self.prompt_id = prompt_id
+        self.poem_style = random.choice(["limerick", "haiku", "ballad"])
+        print(f"[debug] DynamicContext initialized with poem_style: {self.poem_style}")
+
+
+async def _get_dynamic_prompt(data: GenerateDynamicPromptData):
+    ctx: DynamicContext = data.context.context
+    return {
+        "id": ctx.prompt_id,
+        "version": "1",
+        "variables": {
+            "poem_style": ctx.poem_style,
+        },
+    }
+
+
+async def dynamic_prompt(prompt_id: str):
+    context = DynamicContext(prompt_id)
+
+    agent = Agent(
+        name="Assistant",
+        prompt=_get_dynamic_prompt,
+    )
+
+    result = await Runner.run(agent, "Tell me about recursion in programming.", context=context)
+    print(result.final_output)
+
+
+async def static_prompt(prompt_id: str):
+    agent = Agent(
+        name="Assistant",
+        prompt={
+            "id": prompt_id,
+            "version": "1",
+            "variables": {
+                "poem_style": "limerick",
+            },
+        },
+    )
+
+    result = await Runner.run(agent, "Tell me about recursion in programming.")
+    print(result.final_output)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dynamic", action="store_true")
+    parser.add_argument("--prompt-id", type=str, default=DEFAULT_PROMPT_ID)
+    args = parser.parse_args()
+
+    if args.dynamic:
+        asyncio.run(dynamic_prompt(args.prompt_id))
+    else:
+        asyncio.run(static_prompt(args.prompt_id))
diff --git a/pyproject.toml b/pyproject.toml
index a674dac96..e6d1d7746 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = ">=3.9"
 license = "MIT"
 authors = [{ name = "OpenAI", email = "support@openai.com" }]
 dependencies = [
-    "openai>=1.81.0",
+    "openai>=1.87.0",
     "pydantic>=2.10, <3",
     "griffe>=1.5.6, <2",
     "typing-extensions>=4.12.2, <5",
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
index ee2f2aa16..a52347605 100644
--- a/src/agents/__init__.py
+++ b/src/agents/__init__.py
@@ -45,6 +45,7 @@
 from .models.openai_chatcompletions import OpenAIChatCompletionsModel
 from .models.openai_provider import OpenAIProvider
 from .models.openai_responses import OpenAIResponsesModel
+from .prompts import DynamicPromptFunction, GenerateDynamicPromptData, Prompt
 from .repl import run_demo_loop
 from .result import RunResult, RunResultStreaming
 from .run import RunConfig, Runner
@@ -178,6 +179,9 @@ def enable_verbose_stdout_logging():
     "AgentsException",
     "InputGuardrailTripwireTriggered",
     "OutputGuardrailTripwireTriggered",
+    "DynamicPromptFunction",
+    "GenerateDynamicPromptData",
+    "Prompt",
     "MaxTurnsExceeded",
     "ModelBehaviorError",
     "UserError",
diff --git a/src/agents/agent.py b/src/agents/agent.py
index 6adccedd5..61a9abe0c 100644
--- a/src/agents/agent.py
+++ b/src/agents/agent.py
@@ -7,6 +7,7 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, cast
 
+from openai.types.responses.response_prompt_param import ResponsePromptParam
 from typing_extensions import NotRequired, TypeAlias, TypedDict
 
 from .agent_output import AgentOutputSchemaBase
@@ -17,6 +18,7 @@
 from .mcp import MCPUtil
 from .model_settings import ModelSettings
 from .models.interface import Model
+from .prompts import DynamicPromptFunction, Prompt, PromptUtil
 from .run_context import RunContextWrapper, TContext
 from .tool import FunctionTool, FunctionToolResult, Tool, function_tool
 from .util import _transforms
@@ -95,6 +97,12 @@ class Agent(Generic[TContext]):
     return a string.
     """
 
+    prompt: Prompt | DynamicPromptFunction | None = None
+    """A prompt object (or a function that returns a Prompt). Prompts allow you to dynamically
+    configure the instructions, tools and other config for an agent outside of your code. Only
+    usable with OpenAI models, using the Responses API.
+    """
+
     handoff_description: str | None = None
     """A description of the agent. This is used when the agent is used as a handoff, so that an
     LLM knows what it does and when to invoke it.
@@ -242,6 +250,12 @@ async def get_system_prompt(self, run_context: RunContextWrapper[TContext]) -> s
 
         return None
 
+    async def get_prompt(
+        self, run_context: RunContextWrapper[TContext]
+    ) -> ResponsePromptParam | None:
+        """Get the prompt for the agent."""
+        return await PromptUtil.to_model_input(self.prompt, run_context, self)
+
     async def get_mcp_tools(self) -> list[Tool]:
         """Fetches the available tools from the MCP servers."""
         convert_schemas_to_strict = self.mcp_config.get("convert_schemas_to_strict", False)
diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py
index 3ded80be2..c58a52dae 100644
--- a/src/agents/extensions/models/litellm_model.py
+++ b/src/agents/extensions/models/litellm_model.py
@@ -71,6 +71,7 @@ async def get_response(
         handoffs: list[Handoff],
         tracing: ModelTracing,
         previous_response_id: str | None,
+        prompt: Any | None = None,
     ) -> ModelResponse:
         with generation_span(
             model=str(self.model),
@@ -88,6 +89,7 @@ async def get_response(
                 span_generation,
                 tracing,
                 stream=False,
+                prompt=prompt,
             )
 
             assert isinstance(response.choices[0], litellm.types.utils.Choices)
@@ -153,8 +155,8 @@ async def stream_response(
         output_schema: AgentOutputSchemaBase | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
-        *,
         previous_response_id: str | None,
+        prompt: Any | None = None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         with generation_span(
             model=str(self.model),
@@ -172,6 +174,7 @@ async def stream_response(
                 span_generation,
                 tracing,
                 stream=True,
+                prompt=prompt,
             )
 
             final_response: Response | None = None
@@ -202,6 +205,7 @@ async def _fetch_response(
         span: Span[GenerationSpanData],
         tracing: ModelTracing,
         stream: Literal[True],
+        prompt: Any | None = None,
     ) -> tuple[Response, AsyncStream[ChatCompletionChunk]]: ...
 
     @overload
@@ -216,6 +220,7 @@ async def _fetch_response(
         span: Span[GenerationSpanData],
         tracing: ModelTracing,
         stream: Literal[False],
+        prompt: Any | None = None,
     ) -> litellm.types.utils.ModelResponse: ...
 
     async def _fetch_response(
@@ -229,6 +234,7 @@ async def _fetch_response(
         span: Span[GenerationSpanData],
         tracing: ModelTracing,
         stream: bool = False,
+        prompt: Any | None = None,
     ) -> litellm.types.utils.ModelResponse | tuple[Response, AsyncStream[ChatCompletionChunk]]:
         converted_messages = Converter.items_to_messages(input)
 
diff --git a/src/agents/models/interface.py b/src/agents/models/interface.py
index 3a79e5640..5a185806c 100644
--- a/src/agents/models/interface.py
+++ b/src/agents/models/interface.py
@@ -5,6 +5,8 @@
 from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING
 
+from openai.types.responses.response_prompt_param import ResponsePromptParam
+
 from ..agent_output import AgentOutputSchemaBase
 from ..handoffs import Handoff
 from ..items import ModelResponse, TResponseInputItem, TResponseStreamEvent
@@ -46,6 +48,7 @@ async def get_response(
         tracing: ModelTracing,
         *,
         previous_response_id: str | None,
+        prompt: ResponsePromptParam | None,
     ) -> ModelResponse:
         """Get a response from the model.
 
@@ -59,6 +62,7 @@ async def get_response(
             tracing: Tracing configuration.
             previous_response_id: the ID of the previous response. Generally not used by the model,
                 except for the OpenAI Responses API.
+            prompt: The prompt config to use for the model.
 
         Returns:
             The full model response.
@@ -77,6 +81,7 @@ def stream_response(
         tracing: ModelTracing,
         *,
         previous_response_id: str | None,
+        prompt: ResponsePromptParam | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         """Stream a response from the model.
 
@@ -90,6 +95,7 @@ def stream_response(
             tracing: Tracing configuration.
             previous_response_id: the ID of the previous response. Generally not used by the model,
                 except for the OpenAI Responses API.
+            prompt: The prompt config to use for the model.
 
         Returns:
             An iterator of response stream events, in OpenAI Responses format.
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
index 4dad976bd..08803d8c0 100644
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@@ -9,6 +9,7 @@
 from openai.types import ChatModel
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai.types.responses import Response
+from openai.types.responses.response_prompt_param import ResponsePromptParam
 from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 from .. import _debug
@@ -53,6 +54,7 @@ async def get_response(
         handoffs: list[Handoff],
         tracing: ModelTracing,
         previous_response_id: str | None,
+        prompt: ResponsePromptParam | None = None,
     ) -> ModelResponse:
         with generation_span(
             model=str(self.model),
@@ -69,6 +71,7 @@ async def get_response(
                 span_generation,
                 tracing,
                 stream=False,
+                prompt=prompt,
             )
 
             first_choice = response.choices[0]
@@ -136,8 +139,8 @@ async def stream_response(
         output_schema: AgentOutputSchemaBase | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
-        *,
         previous_response_id: str | None,
+        prompt: ResponsePromptParam | None = None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         """
         Yields a partial message as it is generated, as well as the usage information.
@@ -157,6 +160,7 @@ async def stream_response(
                 span_generation,
                 tracing,
                 stream=True,
+                prompt=prompt,
             )
 
             final_response: Response | None = None
@@ -187,6 +191,7 @@ async def _fetch_response(
         span: Span[GenerationSpanData],
         tracing: ModelTracing,
         stream: Literal[True],
+        prompt: ResponsePromptParam | None = None,
     ) -> tuple[Response, AsyncStream[ChatCompletionChunk]]: ...
 
     @overload
@@ -201,6 +206,7 @@ async def _fetch_response(
         span: Span[GenerationSpanData],
         tracing: ModelTracing,
         stream: Literal[False],
+        prompt: ResponsePromptParam | None = None,
     ) -> ChatCompletion: ...
 
     async def _fetch_response(
@@ -214,6 +220,7 @@ async def _fetch_response(
         span: Span[GenerationSpanData],
         tracing: ModelTracing,
         stream: bool = False,
+        prompt: ResponsePromptParam | None = None,
     ) -> ChatCompletion | tuple[Response, AsyncStream[ChatCompletionChunk]]:
         converted_messages = Converter.items_to_messages(input)
 
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index fdc391c6a..961f690b0 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -17,6 +17,7 @@
     WebSearchToolParam,
     response_create_params,
 )
+from openai.types.responses.response_prompt_param import ResponsePromptParam
 
 from .. import _debug
 from ..agent_output import AgentOutputSchemaBase
@@ -74,6 +75,7 @@ async def get_response(
         handoffs: list[Handoff],
         tracing: ModelTracing,
         previous_response_id: str | None,
+        prompt: ResponsePromptParam | None = None,
     ) -> ModelResponse:
         with response_span(disabled=tracing.is_disabled()) as span_response:
             try:
@@ -86,6 +88,7 @@ async def get_response(
                     handoffs,
                     previous_response_id,
                     stream=False,
+                    prompt=prompt,
                 )
 
                 if _debug.DONT_LOG_MODEL_DATA:
@@ -141,6 +144,7 @@ async def stream_response(
         handoffs: list[Handoff],
         tracing: ModelTracing,
         previous_response_id: str | None,
+        prompt: ResponsePromptParam | None = None,
     ) -> AsyncIterator[ResponseStreamEvent]:
         """
         Yields a partial message as it is generated, as well as the usage information.
@@ -156,6 +160,7 @@ async def stream_response(
                     handoffs,
                     previous_response_id,
                     stream=True,
+                    prompt=prompt,
                 )
 
                 final_response: Response | None = None
@@ -192,6 +197,7 @@ async def _fetch_response(
         handoffs: list[Handoff],
         previous_response_id: str | None,
         stream: Literal[True],
+        prompt: ResponsePromptParam | None = None,
     ) -> AsyncStream[ResponseStreamEvent]: ...
 
     @overload
@@ -205,6 +211,7 @@ async def _fetch_response(
         handoffs: list[Handoff],
         previous_response_id: str | None,
         stream: Literal[False],
+        prompt: ResponsePromptParam | None = None,
     ) -> Response: ...
 
     async def _fetch_response(
@@ -217,6 +224,7 @@ async def _fetch_response(
         handoffs: list[Handoff],
         previous_response_id: str | None,
         stream: Literal[True] | Literal[False] = False,
+        prompt: ResponsePromptParam | None = None,
     ) -> Response | AsyncStream[ResponseStreamEvent]:
         list_input = ItemHelpers.input_to_new_input_list(input)
 
@@ -252,6 +260,7 @@ async def _fetch_response(
             input=list_input,
             include=converted_tools.includes,
             tools=converted_tools.tools,
+            prompt=self._non_null_or_not_given(prompt),
             temperature=self._non_null_or_not_given(model_settings.temperature),
             top_p=self._non_null_or_not_given(model_settings.top_p),
             truncation=self._non_null_or_not_given(model_settings.truncation),
diff --git a/src/agents/prompts.py b/src/agents/prompts.py
new file mode 100644
index 000000000..aa627d033
--- /dev/null
+++ b/src/agents/prompts.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+import inspect
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Callable
+
+from openai.types.responses.response_prompt_param import (
+    ResponsePromptParam,
+    Variables as ResponsesPromptVariables,
+)
+from typing_extensions import NotRequired, TypedDict
+
+from agents.util._types import MaybeAwaitable
+
+from .exceptions import UserError
+from .run_context import RunContextWrapper
+
+if TYPE_CHECKING:
+    from .agent import Agent
+
+
+class Prompt(TypedDict):
+    """Prompt configuration to use for interacting with an OpenAI model."""
+
+    id: str
+    """The unique ID of the prompt."""
+
+    version: NotRequired[str]
+    """Optional version of the prompt."""
+
+    variables: NotRequired[dict[str, ResponsesPromptVariables]]
+    """Optional variables to substitute into the prompt."""
+
+
+@dataclass
+class GenerateDynamicPromptData:
+    """Inputs to a function that allows you to dynamically generate a prompt."""
+
+    context: RunContextWrapper[Any]
+    """The run context."""
+
+    agent: Agent[Any]
+    """The agent for which the prompt is being generated."""
+
+
+DynamicPromptFunction = Callable[[GenerateDynamicPromptData], MaybeAwaitable[Prompt]]
+"""A function that dynamically generates a prompt."""
+
+
+class PromptUtil:
+    @staticmethod
+    async def to_model_input(
+        prompt: Prompt | DynamicPromptFunction | None,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+    ) -> ResponsePromptParam | None:
+        if prompt is None:
+            return None
+
+        resolved_prompt: Prompt
+        if isinstance(prompt, dict):
+            resolved_prompt = prompt
+        else:
+            func_result = prompt(GenerateDynamicPromptData(context=context, agent=agent))
+            if inspect.isawaitable(func_result):
+                resolved_prompt = await func_result
+            else:
+                resolved_prompt = func_result
+            if not isinstance(resolved_prompt, dict):
+                raise UserError("Dynamic prompt function must return a Prompt")
+
+        return {
+            "id": resolved_prompt["id"],
+            "version": resolved_prompt.get("version"),
+            "variables": resolved_prompt.get("variables"),
+        }
diff --git a/src/agents/run.py b/src/agents/run.py
index f375a0b89..10148a323 100644
--- a/src/agents/run.py
+++ b/src/agents/run.py
@@ -6,6 +6,9 @@
 from typing import Any, cast
 
 from openai.types.responses import ResponseCompletedEvent
+from openai.types.responses.response_prompt_param import (
+    ResponsePromptParam,
+)
 
 from ._run_impl import (
     AgentToolUseTracker,
@@ -682,7 +685,10 @@ async def _run_single_turn_streamed(
         streamed_result.current_agent = agent
         streamed_result._current_agent_output_schema = output_schema
 
-        system_prompt = await agent.get_system_prompt(context_wrapper)
+        system_prompt, prompt_config = await asyncio.gather(
+            agent.get_system_prompt(context_wrapper),
+            agent.get_prompt(context_wrapper),
+        )
 
         handoffs = cls._get_handoffs(agent)
         model = cls._get_model(agent, run_config)
@@ -706,6 +712,7 @@ async def _run_single_turn_streamed(
                 run_config.tracing_disabled, run_config.trace_include_sensitive_data
             ),
             previous_response_id=previous_response_id,
+            prompt=prompt_config,
         ):
             if isinstance(event, ResponseCompletedEvent):
                 usage = (
@@ -777,7 +784,10 @@ async def _run_single_turn(
                 ),
             )
 
-        system_prompt = await agent.get_system_prompt(context_wrapper)
+        system_prompt, prompt_config = await asyncio.gather(
+            agent.get_system_prompt(context_wrapper),
+            agent.get_prompt(context_wrapper),
+        )
 
         output_schema = cls._get_output_schema(agent)
         handoffs = cls._get_handoffs(agent)
@@ -795,6 +805,7 @@ async def _run_single_turn(
             run_config,
             tool_use_tracker,
             previous_response_id,
+            prompt_config,
         )
 
         return await cls._get_single_step_result_from_response(
@@ -938,6 +949,7 @@ async def _get_new_response(
         run_config: RunConfig,
         tool_use_tracker: AgentToolUseTracker,
         previous_response_id: str | None,
+        prompt_config: ResponsePromptParam | None,
     ) -> ModelResponse:
         model = cls._get_model(agent, run_config)
         model_settings = agent.model_settings.resolve(run_config.model_settings)
@@ -954,6 +966,7 @@ async def _get_new_response(
                 run_config.tracing_disabled, run_config.trace_include_sensitive_data
             ),
             previous_response_id=previous_response_id,
+            prompt=prompt_config,
         )
 
         context_wrapper.usage.add(new_response.usage)
diff --git a/src/agents/tool_context.py b/src/agents/tool_context.py
index 17b595f06..c4329b8af 100644
--- a/src/agents/tool_context.py
+++ b/src/agents/tool_context.py
@@ -7,6 +7,7 @@
 def _assert_must_pass_tool_call_id() -> str:
     raise ValueError("tool_call_id must be passed to ToolContext")
 
+
 @dataclass
 class ToolContext(RunContextWrapper[TContext]):
     """The context of a tool call."""
diff --git a/tests/fake_model.py b/tests/fake_model.py
index 9f0c83a2f..6c1377e6d 100644
--- a/tests/fake_model.py
+++ b/tests/fake_model.py
@@ -61,6 +61,7 @@ async def get_response(
         tracing: ModelTracing,
         *,
         previous_response_id: str | None,
+        prompt: Any | None,
     ) -> ModelResponse:
         self.last_turn_args = {
             "system_instructions": system_instructions,
@@ -103,6 +104,7 @@ async def stream_response(
         tracing: ModelTracing,
         *,
         previous_response_id: str | None,
+        prompt: Any | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         self.last_turn_args = {
             "system_instructions": system_instructions,
diff --git a/tests/models/test_litellm_chatcompletions_stream.py b/tests/models/test_litellm_chatcompletions_stream.py
index 06e46b39c..cd342e444 100644
--- a/tests/models/test_litellm_chatcompletions_stream.py
+++ b/tests/models/test_litellm_chatcompletions_stream.py
@@ -90,6 +90,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     ):
         output_events.append(event)
     # We expect a response.created, then a response.output_item.added, content part added,
@@ -182,6 +183,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     ):
         output_events.append(event)
     # Expect sequence similar to text: created, output_item.added, content part added,
@@ -270,6 +272,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     ):
         output_events.append(event)
     # Sequence should be: response.created, then after loop we expect function call-related events:
diff --git a/tests/test_agent_prompt.py b/tests/test_agent_prompt.py
new file mode 100644
index 000000000..010717d66
--- /dev/null
+++ b/tests/test_agent_prompt.py
@@ -0,0 +1,97 @@
+import pytest
+
+from agents import Agent, Prompt, RunContextWrapper, Runner
+
+from .fake_model import FakeModel
+from .test_responses import get_text_message
+
+
+class PromptCaptureFakeModel(FakeModel):
+    """Subclass of FakeModel that records the prompt passed to the model."""
+
+    def __init__(self):
+        super().__init__()
+        self.last_prompt = None
+
+    async def get_response(
+        self,
+        system_instructions,
+        input,
+        model_settings,
+        tools,
+        output_schema,
+        handoffs,
+        tracing,
+        *,
+        previous_response_id,
+        prompt,
+    ):
+        # Record the prompt that the agent resolved and passed in.
+        self.last_prompt = prompt
+        return await super().get_response(
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            tracing,
+            previous_response_id=previous_response_id,
+            prompt=prompt,
+        )
+
+
+@pytest.mark.asyncio
+async def test_static_prompt_is_resolved_correctly():
+    static_prompt: Prompt = {
+        "id": "my_prompt",
+        "version": "1",
+        "variables": {"some_var": "some_value"},
+    }
+
+    agent = Agent(name="test", prompt=static_prompt)
+    context_wrapper = RunContextWrapper(context=None)
+
+    resolved = await agent.get_prompt(context_wrapper)
+
+    assert resolved == {
+        "id": "my_prompt",
+        "version": "1",
+        "variables": {"some_var": "some_value"},
+    }
+
+
+@pytest.mark.asyncio
+async def test_dynamic_prompt_is_resolved_correctly():
+    dynamic_prompt_value: Prompt = {"id": "dyn_prompt", "version": "2"}
+
+    def dynamic_prompt_fn(_data):
+        return dynamic_prompt_value
+
+    agent = Agent(name="test", prompt=dynamic_prompt_fn)
+    context_wrapper = RunContextWrapper(context=None)
+
+    resolved = await agent.get_prompt(context_wrapper)
+
+    assert resolved == {"id": "dyn_prompt", "version": "2", "variables": None}
+
+
+@pytest.mark.asyncio
+async def test_prompt_is_passed_to_model():
+    static_prompt: Prompt = {"id": "model_prompt"}
+
+    model = PromptCaptureFakeModel()
+    agent = Agent(name="test", model=model, prompt=static_prompt)
+
+    # Ensure the model returns a simple message so the run completes in one turn.
+    model.set_next_output([get_text_message("done")])
+
+    await Runner.run(agent, input="hello")
+
+    # The model should have received the prompt resolved by the agent.
+    expected_prompt = {
+        "id": "model_prompt",
+        "version": None,
+        "variables": None,
+    }
+    assert model.last_prompt == expected_prompt
diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py
index 9a85dcb7b..a6909b195 100644
--- a/tests/test_openai_chatcompletions.py
+++ b/tests/test_openai_chatcompletions.py
@@ -77,6 +77,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     )
     # Should have produced exactly one output message with one text part
     assert isinstance(resp, ModelResponse)
@@ -128,6 +129,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     )
     assert len(resp.output) == 1
     assert isinstance(resp.output[0], ResponseOutputMessage)
@@ -180,6 +182,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     )
     # Expect a message item followed by a function tool call item.
     assert len(resp.output) == 2
@@ -221,6 +224,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     )
     assert resp.output == []
 
diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py
index 5c8bb9e3a..49e7bc2f4 100644
--- a/tests/test_openai_chatcompletions_stream.py
+++ b/tests/test_openai_chatcompletions_stream.py
@@ -90,6 +90,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     ):
         output_events.append(event)
     # We expect a response.created, then a response.output_item.added, content part added,
@@ -182,6 +183,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     ):
         output_events.append(event)
     # Expect sequence similar to text: created, output_item.added, content part added,
@@ -270,6 +272,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         handoffs=[],
         tracing=ModelTracing.DISABLED,
         previous_response_id=None,
+        prompt=None,
     ):
         output_events.append(event)
     # Sequence should be: response.created, then after loop we expect function call-related events:
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
index db24fe496..fe63e8ecb 100644
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@@ -71,6 +71,7 @@ async def dummy_fetch_response(
             handoffs,
             prev_response_id,
             stream,
+            prompt,
         ):
             return DummyResponse()
 
@@ -115,6 +116,7 @@ async def dummy_fetch_response(
             handoffs,
             prev_response_id,
             stream,
+            prompt,
         ):
             return DummyResponse()
 
@@ -157,6 +159,7 @@ async def dummy_fetch_response(
             handoffs,
             prev_response_id,
             stream,
+            prompt,
         ):
             return DummyResponse()
 
@@ -196,6 +199,7 @@ async def dummy_fetch_response(
             handoffs,
             prev_response_id,
             stream,
+            prompt,
         ):
             class DummyStream:
                 async def __aiter__(self):
@@ -249,6 +253,7 @@ async def dummy_fetch_response(
             handoffs,
             prev_response_id,
             stream,
+            prompt,
         ):
             class DummyStream:
                 async def __aiter__(self):
@@ -301,6 +306,7 @@ async def dummy_fetch_response(
             handoffs,
             prev_response_id,
             stream,
+            prompt,
         ):
             class DummyStream:
                 async def __aiter__(self):
diff --git a/tests/voice/test_workflow.py b/tests/voice/test_workflow.py
index 035a05d56..cfbb95011 100644
--- a/tests/voice/test_workflow.py
+++ b/tests/voice/test_workflow.py
@@ -2,6 +2,7 @@
 
 import json
 from collections.abc import AsyncIterator
+from typing import Any
 
 import pytest
 from inline_snapshot import snapshot
@@ -53,6 +54,7 @@ async def get_response(
         tracing: ModelTracing,
         *,
         previous_response_id: str | None,
+        prompt: Any | None,
     ) -> ModelResponse:
         raise NotImplementedError("Not implemented")
 
@@ -67,6 +69,7 @@ async def stream_response(
         tracing: ModelTracing,
         *,
         previous_response_id: str | None,
+        prompt: Any | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         output = self.get_next_output()
         for item in output:
diff --git a/uv.lock b/uv.lock
index 56f56f413..cd26d45f6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1461,7 +1461,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.81.0"
+version = "1.87.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1473,9 +1473,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/89/a1e4f3fa7ca4f7fec90dbf47d93b7cd5ff65924926733af15044e302a192/openai-1.81.0.tar.gz", hash = "sha256:349567a8607e0bcffd28e02f96b5c2397d0d25d06732d90ab3ecbf97abf030f9", size = 456861 }
+sdist = { url = "https://files.pythonhosted.org/packages/47/ed/2b3f6c7e950784e9442115ab8ebeff514d543fb33da10607b39364645a75/openai-1.87.0.tar.gz", hash = "sha256:5c69764171e0db9ef993e7a4d8a01fd8ff1026b66f8bdd005b9461782b6e7dfc", size = 470880 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/66/bcc7f9bf48e8610a33e3b5c96a5a644dad032d92404ea2a5e8b43ba067e8/openai-1.81.0-py3-none-any.whl", hash = "sha256:1c71572e22b43876c5d7d65ade0b7b516bb527c3d44ae94111267a09125f7bae", size = 717529 },
+    { url = "https://files.pythonhosted.org/packages/36/ac/313ded47ce1d5bc2ec02ed5dd5506bf5718678a4655ac20f337231d9aae3/openai-1.87.0-py3-none-any.whl", hash = "sha256:f9bcae02ac4fff6522276eee85d33047335cfb692b863bd8261353ce4ada5692", size = 734368 },
 ]
 
 [[package]]
@@ -1536,7 +1536,7 @@ requires-dist = [
     { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.67.4.post1,<2" },
     { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.9.4,<2" },
     { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" },
-    { name = "openai", specifier = ">=1.81.0" },
+    { name = "openai", specifier = ">=1.87.0" },
     { name = "pydantic", specifier = ">=2.10,<3" },
     { name = "requests", specifier = ">=2.0,<3" },
     { name = "types-requests", specifier = ">=2.0,<3" },

From 901d2ac57c21b5d4fc56fc5d083a8e6c473b7363 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 16 Jun 2025 15:49:36 -0400
Subject: [PATCH 11/11] v0.0.18 (#878)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e6d1d7746..6cdf337ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai-agents"
-version = "0.0.17"
+version = "0.0.18"
 description = "OpenAI Agents SDK"
 readme = "README.md"
 requires-python = ">=3.9"