From 76b752c9485ce3adf2966ae906da03913bca1c32 Mon Sep 17 00:00:00 2001
From: Stainless Bot <dev+git@stainlessapi.com>
Date: Mon, 2 Dec 2024 20:45:48 +0000
Subject: [PATCH 01/18] chore(internal): bump pyright (#769)

---
 requirements-dev.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index 69698d3f..f09dc70f 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -91,7 +91,7 @@ pydantic-core==2.23.4
     # via pydantic
 pygments==2.18.0
     # via rich
-pyright==1.1.380
+pyright==1.1.389
 pytest==8.3.3
     # via pytest-asyncio
 pytest-asyncio==0.24.0

From 8a1a3f04d55b60f2c9524cbc844a0abb02319279 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 02:15:40 +0000
Subject: [PATCH 02/18] chore: make the `Omit` type public (#772)

---
 src/anthropic/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/anthropic/__init__.py b/src/anthropic/__init__.py
index bf3fef38..8cba2f09 100644
--- a/src/anthropic/__init__.py
+++ b/src/anthropic/__init__.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from . import types
-from ._types import NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes
+from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
 from ._client import (
     Client,
@@ -52,6 +52,7 @@
     "ProxiesTypes",
     "NotGiven",
     "NOT_GIVEN",
+    "Omit",
     "AnthropicError",
     "APIError",
     "APIStatusError",

From ed9ba8f6d34d5d0ff5b3aca30311f0708d82f409 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 9 Dec 2024 14:41:38 +0000
Subject: [PATCH 03/18] chore(internal): bump pydantic dependency (#775)

---
 requirements-dev.lock   | 5 +++--
 requirements.lock       | 4 ++--
 src/anthropic/_types.py | 6 ++----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index f09dc70f..11da837e 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -85,9 +85,9 @@ pyasn1==0.6.1
     # via rsa
 pyasn1-modules==0.4.1
     # via google-auth
-pydantic==2.9.2
+pydantic==2.10.3
     # via anthropic
-pydantic-core==2.23.4
+pydantic-core==2.27.1
     # via pydantic
 pygments==2.18.0
     # via rich
@@ -130,6 +130,7 @@ typing-extensions==4.12.2
     # via mypy
     # via pydantic
     # via pydantic-core
+    # via pyright
 urllib3==1.26.20
     # via botocore
 virtualenv==20.24.5
diff --git a/requirements.lock b/requirements.lock
index f8ed8f88..710c6e61 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -50,9 +50,9 @@ pyasn1==0.6.1
     # via rsa
 pyasn1-modules==0.4.1
     # via google-auth
-pydantic==2.9.2
+pydantic==2.10.3
     # via anthropic
-pydantic-core==2.23.4
+pydantic-core==2.27.1
     # via pydantic
 python-dateutil==2.9.0.post0
     # via botocore
diff --git a/src/anthropic/_types.py b/src/anthropic/_types.py
index fd6f305a..d80c2081 100644
--- a/src/anthropic/_types.py
+++ b/src/anthropic/_types.py
@@ -194,10 +194,8 @@ def get(self, __key: str) -> str | None: ...
 StrBytesIntFloat = Union[str, bytes, int, float]
 
 # Note: copied from Pydantic
-# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = Union[
-    Set[int], Set[str], Mapping[int, Union["IncEx", Literal[True]]], Mapping[str, Union["IncEx", Literal[True]]]
-]
+# https://github.com/pydantic/pydantic/blob/6f31f8f68ef011f84357330186f603ff295312fd/pydantic/main.py#L79
+IncEx: TypeAlias = Union[Set[int], Set[str], Mapping[int, Union["IncEx", bool]], Mapping[str, Union["IncEx", bool]]]
 
 PostParser = Callable[[Any], Any]
 

From fbcf6edef87a50529d62d71bf4ecb47732cae311 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 9 Dec 2024 16:42:10 +0000
Subject: [PATCH 04/18] chore: remove deprecated HTTP client options (#777)

If you were using these options, you should now pass
in a custom `httpx` client instance instead
https://github.com/anthropics/anthropic-sdk-python#configuring-the-http-client
---
 src/anthropic/_client.py |  64 +------------
 tests/test_client.py     | 188 ---------------------------------------
 2 files changed, 2 insertions(+), 250 deletions(-)

diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index 63b2533b..8885e519 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -18,7 +18,6 @@
     NotGiven,
     Transport,
     ProxiesTypes,
-    AsyncTransport,
     RequestOptions,
 )
 from ._utils import (
@@ -30,11 +29,8 @@
 from ._exceptions import APIStatusError
 from ._base_client import (
     DEFAULT_MAX_RETRIES,
-    DEFAULT_CONNECTION_LIMITS,
     SyncAPIClient,
     AsyncAPIClient,
-    SyncHttpxClientWrapper,
-    AsyncHttpxClientWrapper,
 )
 
 __all__ = [
@@ -79,12 +75,6 @@ def __init__(
         # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
         # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
         http_client: httpx.Client | None = None,
-        # See httpx documentation for [custom transports](https://www.python-httpx.org/advanced/#custom-transports)
-        transport: Transport | None = None,
-        # See httpx documentation for [proxies](https://www.python-httpx.org/advanced/#http-proxying)
-        proxies: ProxiesTypes | None = None,
-        # See httpx documentation for [limits](https://www.python-httpx.org/advanced/#pool-limit-configuration)
-        connection_pool_limits: httpx.Limits | None = None,
         # Enable or disable schema validation for data returned by the API.
         # When enabled an error APIResponseValidationError is raised
         # if the API responds with invalid data for the expected schema.
@@ -120,9 +110,6 @@ def __init__(
             max_retries=max_retries,
             timeout=timeout,
             http_client=http_client,
-            transport=transport,
-            proxies=proxies,
-            limits=connection_pool_limits,
             custom_headers=default_headers,
             custom_query=default_query,
             _strict_response_validation=_strict_response_validation,
@@ -198,7 +185,6 @@ def copy(
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
-        connection_pool_limits: httpx.Limits | None = None,
         max_retries: int | NotGiven = NOT_GIVEN,
         default_headers: Mapping[str, str] | None = None,
         set_default_headers: Mapping[str, str] | None = None,
@@ -227,31 +213,13 @@ def copy(
         elif set_default_query is not None:
             params = set_default_query
 
-        if connection_pool_limits is not None:
-            if http_client is not None:
-                raise ValueError("The 'http_client' argument is mutually exclusive with 'connection_pool_limits'")
-
-            if not isinstance(self._client, SyncHttpxClientWrapper):
-                raise ValueError(
-                    "A custom HTTP client has been set and is mutually exclusive with the 'connection_pool_limits' argument"
-                )
-
-            http_client = None
-        else:
-            if self._limits is not DEFAULT_CONNECTION_LIMITS:
-                connection_pool_limits = self._limits
-            else:
-                connection_pool_limits = None
-
-            http_client = http_client or self._client
-
+        http_client = http_client or self._client
         return self.__class__(
             api_key=api_key or self.api_key,
             auth_token=auth_token or self.auth_token,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
-            connection_pool_limits=connection_pool_limits,
             max_retries=max_retries if is_given(max_retries) else self.max_retries,
             default_headers=headers,
             default_query=params,
@@ -325,12 +293,6 @@ def __init__(
         # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
         # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
         http_client: httpx.AsyncClient | None = None,
-        # See httpx documentation for [custom transports](https://www.python-httpx.org/advanced/#custom-transports)
-        transport: AsyncTransport | None = None,
-        # See httpx documentation for [proxies](https://www.python-httpx.org/advanced/#http-proxying)
-        proxies: ProxiesTypes | None = None,
-        # See httpx documentation for [limits](https://www.python-httpx.org/advanced/#pool-limit-configuration)
-        connection_pool_limits: httpx.Limits | None = None,
         # Enable or disable schema validation for data returned by the API.
         # When enabled an error APIResponseValidationError is raised
         # if the API responds with invalid data for the expected schema.
@@ -366,9 +328,6 @@ def __init__(
             max_retries=max_retries,
             timeout=timeout,
             http_client=http_client,
-            transport=transport,
-            proxies=proxies,
-            limits=connection_pool_limits,
             custom_headers=default_headers,
             custom_query=default_query,
             _strict_response_validation=_strict_response_validation,
@@ -444,7 +403,6 @@ def copy(
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
-        connection_pool_limits: httpx.Limits | None = None,
         max_retries: int | NotGiven = NOT_GIVEN,
         default_headers: Mapping[str, str] | None = None,
         set_default_headers: Mapping[str, str] | None = None,
@@ -473,31 +431,13 @@ def copy(
         elif set_default_query is not None:
             params = set_default_query
 
-        if connection_pool_limits is not None:
-            if http_client is not None:
-                raise ValueError("The 'http_client' argument is mutually exclusive with 'connection_pool_limits'")
-
-            if not isinstance(self._client, AsyncHttpxClientWrapper):
-                raise ValueError(
-                    "A custom HTTP client has been set and is mutually exclusive with the 'connection_pool_limits' argument"
-                )
-
-            http_client = None
-        else:
-            if self._limits is not DEFAULT_CONNECTION_LIMITS:
-                connection_pool_limits = self._limits
-            else:
-                connection_pool_limits = None
-
-            http_client = http_client or self._client
-
+        http_client = http_client or self._client
         return self.__class__(
             api_key=api_key or self.api_key,
             auth_token=auth_token or self.auth_token,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
-            connection_pool_limits=connection_pool_limits,
             max_retries=max_retries if is_given(max_retries) else self.max_retries,
             default_headers=headers,
             default_query=params,
diff --git a/tests/test_client.py b/tests/test_client.py
index 36825b9f..f50233f0 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -634,99 +634,6 @@ def test_absolute_request_url(self, client: Anthropic) -> None:
         )
         assert request.url == "https://myapi.com/foo"
 
-    def test_transport_option_is_deprecated(self) -> None:
-        with pytest.warns(
-            DeprecationWarning,
-            match="The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-        ):
-            transport = httpx.MockTransport(
-                lambda: None,  # type: ignore
-            )
-
-            client = Anthropic(
-                base_url=base_url, api_key=api_key, _strict_response_validation=True, transport=transport
-            )
-
-            assert client._client._transport is transport
-
-    def test_transport_option_mutually_exclusive_with_http_client(self) -> None:
-        with httpx.Client() as http_client:
-            with pytest.raises(ValueError, match="The `http_client` argument is mutually exclusive with `transport`"):
-                with pytest.warns(DeprecationWarning):
-                    Anthropic(
-                        base_url=base_url,
-                        api_key=api_key,
-                        _strict_response_validation=True,
-                        transport=httpx.MockTransport(
-                            lambda: None,  # type: ignore
-                        ),
-                        http_client=http_client,
-                    )
-
-    def test_connection_pool_limits_option_is_deprecated(self) -> None:
-        with pytest.warns(
-            DeprecationWarning,
-            match="The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-        ):
-            connection_pool_limits = httpx.Limits(
-                max_connections=101, max_keepalive_connections=76, keepalive_expiry=23
-            )
-
-            client = Anthropic(
-                base_url=base_url,
-                api_key=api_key,
-                _strict_response_validation=True,
-                connection_pool_limits=connection_pool_limits,
-            )
-
-            assert isinstance(client._client._transport, httpx.HTTPTransport)
-            assert client._client._transport._pool._max_connections == 101
-            assert client._client._transport._pool._max_keepalive_connections == 76
-            assert client._client._transport._pool._keepalive_expiry == 23
-
-    def test_connection_pool_limits_option_mutually_exclusive_with_http_client(self) -> None:
-        with httpx.Client() as http_client:
-            with pytest.raises(
-                ValueError, match="The `http_client` argument is mutually exclusive with `connection_pool_limits`"
-            ):
-                with pytest.warns(DeprecationWarning):
-                    Anthropic(
-                        base_url=base_url,
-                        api_key=api_key,
-                        _strict_response_validation=True,
-                        connection_pool_limits=httpx.Limits(
-                            max_connections=101, max_keepalive_connections=76, keepalive_expiry=23
-                        ),
-                        http_client=http_client,
-                    )
-
-    def test_proxies_option_is_deprecated(self) -> None:
-        with pytest.warns(
-            DeprecationWarning,
-            match="The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-        ):
-            proxies = "https://www.example.com/proxy"
-
-            client = Anthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True, proxies=proxies)
-
-            mounts = list(client._client._mounts.keys())
-            assert len(mounts) == 1
-
-            pattern = mounts[0].pattern
-            assert pattern == "all://"
-
-    def test_proxies_option_mutually_exclusive_with_http_client(self) -> None:
-        with httpx.Client() as http_client:
-            with pytest.raises(ValueError, match="The `http_client` argument is mutually exclusive with `proxies`"):
-                with pytest.warns(DeprecationWarning):
-                    Anthropic(
-                        base_url=base_url,
-                        api_key=api_key,
-                        _strict_response_validation=True,
-                        proxies="https://www.example.com/proxy",
-                        http_client=http_client,
-                    )
-
     def test_copied_client_does_not_close_http(self) -> None:
         client = Anthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -1600,101 +1507,6 @@ def test_absolute_request_url(self, client: AsyncAnthropic) -> None:
         )
         assert request.url == "https://myapi.com/foo"
 
-    def test_transport_option_is_deprecated(self) -> None:
-        with pytest.warns(
-            DeprecationWarning,
-            match="The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-        ):
-            transport = httpx.MockTransport(
-                lambda: None,  # type: ignore
-            )
-
-            client = AsyncAnthropic(
-                base_url=base_url, api_key=api_key, _strict_response_validation=True, transport=transport
-            )
-
-            assert client._client._transport is transport
-
-    async def test_transport_option_mutually_exclusive_with_http_client(self) -> None:
-        async with httpx.AsyncClient() as http_client:
-            with pytest.raises(ValueError, match="The `http_client` argument is mutually exclusive with `transport`"):
-                with pytest.warns(DeprecationWarning):
-                    AsyncAnthropic(
-                        base_url=base_url,
-                        api_key=api_key,
-                        _strict_response_validation=True,
-                        transport=httpx.MockTransport(
-                            lambda: None,  # type: ignore
-                        ),
-                        http_client=http_client,
-                    )
-
-    def test_connection_pool_limits_option_is_deprecated(self) -> None:
-        with pytest.warns(
-            DeprecationWarning,
-            match="The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-        ):
-            connection_pool_limits = httpx.Limits(
-                max_connections=101, max_keepalive_connections=76, keepalive_expiry=23
-            )
-
-            client = AsyncAnthropic(
-                base_url=base_url,
-                api_key=api_key,
-                _strict_response_validation=True,
-                connection_pool_limits=connection_pool_limits,
-            )
-
-            assert isinstance(client._client._transport, httpx.AsyncHTTPTransport)
-            assert client._client._transport._pool._max_connections == 101
-            assert client._client._transport._pool._max_keepalive_connections == 76
-            assert client._client._transport._pool._keepalive_expiry == 23
-
-    async def test_connection_pool_limits_option_mutually_exclusive_with_http_client(self) -> None:
-        async with httpx.AsyncClient() as http_client:
-            with pytest.raises(
-                ValueError, match="The `http_client` argument is mutually exclusive with `connection_pool_limits`"
-            ):
-                with pytest.warns(DeprecationWarning):
-                    AsyncAnthropic(
-                        base_url=base_url,
-                        api_key=api_key,
-                        _strict_response_validation=True,
-                        connection_pool_limits=httpx.Limits(
-                            max_connections=101, max_keepalive_connections=76, keepalive_expiry=23
-                        ),
-                        http_client=http_client,
-                    )
-
-    def test_proxies_option_is_deprecated(self) -> None:
-        with pytest.warns(
-            DeprecationWarning,
-            match="The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-        ):
-            proxies = "https://www.example.com/proxy"
-
-            client = AsyncAnthropic(
-                base_url=base_url, api_key=api_key, _strict_response_validation=True, proxies=proxies
-            )
-
-            mounts = list(client._client._mounts.keys())
-            assert len(mounts) == 1
-
-            pattern = mounts[0].pattern
-            assert pattern == "all://"
-
-    async def test_proxies_option_mutually_exclusive_with_http_client(self) -> None:
-        async with httpx.AsyncClient() as http_client:
-            with pytest.raises(ValueError, match="The `http_client` argument is mutually exclusive with `proxies`"):
-                with pytest.warns(DeprecationWarning):
-                    AsyncAnthropic(
-                        base_url=base_url,
-                        api_key=api_key,
-                        _strict_response_validation=True,
-                        proxies="https://www.example.com/proxy",
-                        http_client=http_client,
-                    )
-
     async def test_copied_client_does_not_close_http(self) -> None:
         client = AsyncAnthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()

From e0e3667f8662d3a4094078e9fe84f11a34a2f7a9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 9 Dec 2024 18:17:37 +0000
Subject: [PATCH 05/18] docs(readme): fix http client proxies example (#778)

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f006d1d0..cd376246 100644
--- a/README.md
+++ b/README.md
@@ -654,18 +654,19 @@ can also get all the extra fields on the Pydantic model as a dict with
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
 
-- Support for proxies
-- Custom transports
+- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
+- Custom [transports](https://www.python-httpx.org/advanced/transports/)
 - Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
+import httpx
 from anthropic import Anthropic, DefaultHttpxClient
 
 client = Anthropic(
     # Or use the `ANTHROPIC_BASE_URL` env var
     base_url="http://my.test.server.example.com:8083",
     http_client=DefaultHttpxClient(
-        proxies="http://my.test.proxy.example.com",
+        proxy="http://my.test.proxy.example.com",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
 )

From 0708f9a3a3276c524bdd3ee25099b02d3a83b64f Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 10 Dec 2024 19:31:43 +0000
Subject: [PATCH 06/18] docs: use latest sonnet in example snippets (#781)

---
 README.md                   | 30 +++++++++++++++---------------
 examples/images.py          |  2 +-
 examples/messages.py        |  4 ++--
 examples/messages_stream.py |  2 +-
 examples/tools.py           |  4 ++--
 examples/tools_stream.py    |  2 +-
 helpers.md                  |  4 ++--
 tests/test_client.py        |  8 ++++----
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index cd376246..5a61b0c1 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ message = client.messages.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 print(message.content)
 ```
@@ -70,7 +70,7 @@ async def main() -> None:
                 "content": "Hello, Claude",
             }
         ],
-        model="claude-3-opus-20240229",
+        model="claude-3-5-sonnet-latest",
     )
     print(message.content)
 
@@ -97,7 +97,7 @@ stream = client.messages.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
     stream=True,
 )
 for event in stream:
@@ -119,7 +119,7 @@ stream = await client.messages.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
     stream=True,
 )
 async for event in stream:
@@ -145,7 +145,7 @@ async def main() -> None:
                 "content": "Say hello there!",
             }
         ],
-        model="claude-3-opus-20240229",
+        model="claude-3-5-sonnet-latest",
     ) as stream:
         async for text in stream.text_stream:
             print(text, end="", flush=True)
@@ -198,7 +198,7 @@ await client.beta.messages.batches.create(
         {
             "custom_id": "my-first-request",
             "params": {
-                "model": "claude-3-5-sonnet-20240620",
+                "model": "claude-3-5-sonnet-latest",
                 "max_tokens": 1024,
                 "messages": [{"role": "user", "content": "Hello, world"}],
             },
@@ -206,7 +206,7 @@ await client.beta.messages.batches.create(
         {
             "custom_id": "my-second-request",
             "params": {
-                "model": "claude-3-5-sonnet-20240620",
+                "model": "claude-3-5-sonnet-latest",
                 "max_tokens": 1024,
                 "messages": [{"role": "user", "content": "Hi again, friend"}],
             },
@@ -250,7 +250,7 @@ message = client.messages.create(
             "content": "Hello!",
         }
     ],
-    model="anthropic.claude-3-sonnet-20240229-v1:0",
+    model="anthropic.claude-3-5-sonnet-20241022-v2:0",
 )
 print(message)
 ```
@@ -281,7 +281,7 @@ from anthropic import AnthropicVertex
 client = AnthropicVertex()
 
 message = client.messages.create(
-    model="claude-3-sonnet@20240229",
+    model="claude-3-5-sonnet-v2@20241022",
     max_tokens=100,
     messages=[
         {
@@ -399,7 +399,7 @@ try:
                 "content": "Hello, Claude",
             }
         ],
-        model="claude-3-opus-20240229",
+        model="claude-3-5-sonnet-latest",
     )
 except anthropic.APIConnectionError as e:
     print("The server could not be reached")
@@ -440,7 +440,7 @@ message = client.messages.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 print(message._request_id)  # req_018EeWyXxfu5pfWkrYcMdjWG
 ```
@@ -475,7 +475,7 @@ client.with_options(max_retries=5).messages.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 ```
 
@@ -507,7 +507,7 @@ client.with_options(timeout=5.0).messages.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 ```
 
@@ -571,7 +571,7 @@ response = client.messages.with_raw_response.create(
         "role": "user",
         "content": "Hello, Claude",
     }],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 print(response.headers.get('X-My-Header'))
 
@@ -605,7 +605,7 @@ with client.messages.with_streaming_response.create(
             "content": "Hello, Claude",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 ) as response:
     print(response.headers.get("X-My-Header"))
 
diff --git a/examples/images.py b/examples/images.py
index 0da834bc..0ebf2eb5 100644
--- a/examples/images.py
+++ b/examples/images.py
@@ -25,6 +25,6 @@
             ],
         },
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 print(response.model_dump_json(indent=2))
diff --git a/examples/messages.py b/examples/messages.py
index f2e7e3c2..55151dd0 100644
--- a/examples/messages.py
+++ b/examples/messages.py
@@ -10,7 +10,7 @@
             "content": "Hello!",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 print(response)
 
@@ -30,6 +30,6 @@
             "content": "How are you?",
         },
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 )
 print(response2)
diff --git a/examples/messages_stream.py b/examples/messages_stream.py
index be69a2c1..a7288aab 100644
--- a/examples/messages_stream.py
+++ b/examples/messages_stream.py
@@ -14,7 +14,7 @@ async def main() -> None:
                 "content": "Say hello there!",
             }
         ],
-        model="claude-3-opus-20240229",
+        model="claude-3-5-sonnet-latest",
     ) as stream:
         async for event in stream:
             if event.type == "text":
diff --git a/examples/tools.py b/examples/tools.py
index c0a8ea60..b6978757 100644
--- a/examples/tools.py
+++ b/examples/tools.py
@@ -21,7 +21,7 @@
 ]
 
 message = client.messages.create(
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
     max_tokens=1024,
     messages=[user_message],
     tools=tools,
@@ -32,7 +32,7 @@
 
 tool = next(c for c in message.content if c.type == "tool_use")
 response = client.messages.create(
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
     max_tokens=1024,
     messages=[
         user_message,
diff --git a/examples/tools_stream.py b/examples/tools_stream.py
index 0fe9cfc1..4d712508 100644
--- a/examples/tools_stream.py
+++ b/examples/tools_stream.py
@@ -8,7 +8,7 @@
 async def main() -> None:
     async with client.messages.stream(
         max_tokens=1024,
-        model="claude-3-haiku-20240307",
+        model="claude-3-5-sonnet-latest",
         tools=[
             {
                 "name": "get_weather",
diff --git a/helpers.md b/helpers.md
index 55c249a2..65eac0bf 100644
--- a/helpers.md
+++ b/helpers.md
@@ -11,7 +11,7 @@ async with client.messages.stream(
             "content": "Say hello there!",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 ) as stream:
     async for text in stream.text_stream:
         print(text, end="", flush=True)
@@ -60,7 +60,7 @@ async with client.messages.stream(
             "content": "Say hello there!",
         }
     ],
-    model="claude-3-opus-20240229",
+    model="claude-3-5-sonnet-latest",
 ) as stream:
     async for event in stream:
         if event.type == "text":
diff --git a/tests/test_client.py b/tests/test_client.py
index f50233f0..ca181ada 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -745,7 +745,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
                                 "content": "Hello, Claude",
                             }
                         ],
-                        model="claude-3-opus-20240229",
+                        model="claude-3-5-sonnet-latest",
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -772,7 +772,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
                                 "content": "Hello, Claude",
                             }
                         ],
-                        model="claude-3-opus-20240229",
+                        model="claude-3-5-sonnet-latest",
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -1625,7 +1625,7 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
                                 "content": "Hello, Claude",
                             }
                         ],
-                        model="claude-3-opus-20240229",
+                        model="claude-3-5-sonnet-latest",
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -1652,7 +1652,7 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
                                 "content": "Hello, Claude",
                             }
                         ],
-                        model="claude-3-opus-20240229",
+                        model="claude-3-5-sonnet-latest",
                     ),
                 ),
                 cast_to=httpx.Response,

From 2b0c0391bcfa5558742c503dac2934c1e5604421 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 12 Dec 2024 12:13:37 +0000
Subject: [PATCH 07/18] chore(internal): bump pyright (#785)

---
 requirements-dev.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index 11da837e..ce790734 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -91,7 +91,7 @@ pydantic-core==2.27.1
     # via pydantic
 pygments==2.18.0
     # via rich
-pyright==1.1.389
+pyright==1.1.390
 pytest==8.3.3
     # via pytest-asyncio
 pytest-asyncio==0.24.0

From 5c632eaa0e3c80fa46a963906a2306a0c319c11e Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 12 Dec 2024 12:45:34 +0000
Subject: [PATCH 08/18] chore(internal): add support for TypeAliasType (#786)

---
 pyproject.toml                    |  2 +-
 src/anthropic/_legacy_response.py | 18 ++++++++++--------
 src/anthropic/_models.py          |  3 +++
 src/anthropic/_response.py        | 18 ++++++++++--------
 src/anthropic/_utils/__init__.py  |  1 +
 src/anthropic/_utils/_typing.py   | 31 ++++++++++++++++++++++++++++++-
 tests/test_models.py              | 18 +++++++++++++++++-
 tests/utils.py                    |  4 ++++
 8 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e411b281..a2eba320 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ authors = [
 dependencies = [
     "httpx>=0.23.0, <1",
     "pydantic>=1.9.0, <3",
-    "typing-extensions>=4.7, <5",
+    "typing-extensions>=4.10, <5",
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
diff --git a/src/anthropic/_legacy_response.py b/src/anthropic/_legacy_response.py
index 9bd9570a..f336c6f7 100644
--- a/src/anthropic/_legacy_response.py
+++ b/src/anthropic/_legacy_response.py
@@ -24,7 +24,7 @@
 import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type
 from ._models import BaseModel, is_basemodel, add_request_id
 from ._constants import RAW_RESPONSE_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
@@ -197,9 +197,15 @@ def elapsed(self) -> datetime.timedelta:
         return self.http_response.elapsed
 
     def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
         # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
 
         cast_to = to if to is not None else self._cast_to
         origin = get_origin(cast_to) or cast_to
@@ -259,16 +265,12 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
         if cast_to is NoneType:
             return cast(R, None)
 
diff --git a/src/anthropic/_models.py b/src/anthropic/_models.py
index ad846d0c..35705749 100644
--- a/src/anthropic/_models.py
+++ b/src/anthropic/_models.py
@@ -46,6 +46,7 @@
     strip_not_given,
     extract_type_arg,
     is_annotated_type,
+    is_type_alias_type,
     strip_annotated_type,
 )
 from ._compat import (
@@ -444,6 +445,8 @@ def construct_type(*, value: object, type_: object) -> object:
     # we allow `object` as the input type because otherwise, passing things like
     # `Literal['value']` will be reported as a type error by type checkers
     type_ = cast("type[object]", type_)
+    if is_type_alias_type(type_):
+        type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
     if is_annotated_type(type_):
diff --git a/src/anthropic/_response.py b/src/anthropic/_response.py
index a563e25a..a6b95999 100644
--- a/src/anthropic/_response.py
+++ b/src/anthropic/_response.py
@@ -25,7 +25,7 @@
 import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
 from ._models import BaseModel, is_basemodel, add_request_id
 from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
@@ -127,9 +127,15 @@ def __repr__(self) -> str:
         )
 
     def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
         # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
 
         cast_to = to if to is not None else self._cast_to
         origin = get_origin(cast_to) or cast_to
@@ -189,16 +195,12 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
         if cast_to is NoneType:
             return cast(R, None)
 
diff --git a/src/anthropic/_utils/__init__.py b/src/anthropic/_utils/__init__.py
index a7cff3c0..d4fda26f 100644
--- a/src/anthropic/_utils/__init__.py
+++ b/src/anthropic/_utils/__init__.py
@@ -39,6 +39,7 @@
     is_iterable_type as is_iterable_type,
     is_required_type as is_required_type,
     is_annotated_type as is_annotated_type,
+    is_type_alias_type as is_type_alias_type,
     strip_annotated_type as strip_annotated_type,
     extract_type_var_from_base as extract_type_var_from_base,
 )
diff --git a/src/anthropic/_utils/_typing.py b/src/anthropic/_utils/_typing.py
index c036991f..278749b1 100644
--- a/src/anthropic/_utils/_typing.py
+++ b/src/anthropic/_utils/_typing.py
@@ -1,8 +1,17 @@
 from __future__ import annotations
 
+import sys
+import typing
+import typing_extensions
 from typing import Any, TypeVar, Iterable, cast
 from collections import abc as _c_abc
-from typing_extensions import Required, Annotated, get_args, get_origin
+from typing_extensions import (
+    TypeIs,
+    Required,
+    Annotated,
+    get_args,
+    get_origin,
+)
 
 from .._types import InheritsGeneric
 from .._compat import is_union as _is_union
@@ -36,6 +45,26 @@ def is_typevar(typ: type) -> bool:
     return type(typ) == TypeVar  # type: ignore
 
 
+_TYPE_ALIAS_TYPES: tuple[type[typing_extensions.TypeAliasType], ...] = (typing_extensions.TypeAliasType,)
+if sys.version_info >= (3, 12):
+    _TYPE_ALIAS_TYPES = (*_TYPE_ALIAS_TYPES, typing.TypeAliasType)
+
+
+def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
+    """Return whether the provided argument is an instance of `TypeAliasType`.
+
+    ```python
+    type Int = int
+    is_type_alias_type(Int)
+    # > True
+    Str = TypeAliasType("Str", str)
+    is_type_alias_type(Str)
+    # > True
+    ```
+    """
+    return isinstance(tp, _TYPE_ALIAS_TYPES)
+
+
 # Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
 def strip_annotated_type(typ: type) -> type:
     if is_required_type(typ) or is_annotated_type(typ):
diff --git a/tests/test_models.py b/tests/test_models.py
index 278485c2..fe6f9199 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,7 +1,7 @@
 import json
 from typing import Any, Dict, List, Union, Optional, cast
 from datetime import datetime, timezone
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAliasType
 
 import pytest
 import pydantic
@@ -828,3 +828,19 @@ class B(BaseModel):
     # if the discriminator details object stays the same between invocations then
     # we hit the cache
     assert UnionType.__discriminator__ is discriminator
+
+
+@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
+def test_type_alias_type() -> None:
+    Alias = TypeAliasType("Alias", str)
+
+    class Model(BaseModel):
+        alias: Alias
+        union: Union[int, Alias]
+
+    m = construct_type(value={"alias": "foo", "union": "bar"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.alias, str)
+    assert m.alias == "foo"
+    assert isinstance(m.union, str)
+    assert m.union == "bar"
diff --git a/tests/utils.py b/tests/utils.py
index a71253f5..cfc340ad 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -16,6 +16,7 @@
     is_union_type,
     extract_type_arg,
     is_annotated_type,
+    is_type_alias_type,
 )
 from anthropic._compat import PYDANTIC_V2, field_outer_type, get_model_fields
 from anthropic._models import BaseModel
@@ -51,6 +52,9 @@ def assert_matches_type(
     path: list[str],
     allow_none: bool = False,
 ) -> None:
+    if is_type_alias_type(type_):
+        type_ = type_.__value__
+
     # unwrap `Annotated[T, ...]` -> `T`
     if is_annotated_type(type_):
         type_ = extract_type_arg(type_, 0)

From 370d23c50116d90659a459769b2c2f501cbd2e1e Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 13 Dec 2024 11:14:59 +0000
Subject: [PATCH 09/18] chore(internal): remove some duplicated imports (#788)

---
 src/anthropic/resources/beta/beta.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/anthropic/resources/beta/beta.py b/src/anthropic/resources/beta/beta.py
index badb104c..fbff30fa 100644
--- a/src/anthropic/resources/beta/beta.py
+++ b/src/anthropic/resources/beta/beta.py
@@ -2,7 +2,9 @@
 
 from __future__ import annotations
 
-from .messages import (
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .messages.messages import (
     Messages,
     AsyncMessages,
     MessagesWithRawResponse,
@@ -10,9 +12,7 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from .prompt_caching import (
+from .prompt_caching.prompt_caching import (
     PromptCaching,
     AsyncPromptCaching,
     PromptCachingWithRawResponse,
@@ -20,8 +20,6 @@
     PromptCachingWithStreamingResponse,
     AsyncPromptCachingWithStreamingResponse,
 )
-from .messages.messages import Messages, AsyncMessages
-from .prompt_caching.prompt_caching import PromptCaching, AsyncPromptCaching
 
 __all__ = ["Beta", "AsyncBeta"]
 

From 141da6fa178070f702089478828a0869585ebfa8 Mon Sep 17 00:00:00 2001
From: Robert Craigie <robert@craigie.dev>
Date: Fri, 13 Dec 2024 12:29:55 +0000
Subject: [PATCH 10/18] fix(internal): correct support for TypeAliasType

---
 src/anthropic/_legacy_response.py | 1 -
 src/anthropic/_response.py        | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/anthropic/_legacy_response.py b/src/anthropic/_legacy_response.py
index f336c6f7..98ce2972 100644
--- a/src/anthropic/_legacy_response.py
+++ b/src/anthropic/_legacy_response.py
@@ -207,7 +207,6 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to and is_annotated_type(cast_to):
             cast_to = extract_type_arg(cast_to, 0)
 
-        cast_to = to if to is not None else self._cast_to
         origin = get_origin(cast_to) or cast_to
 
         if inspect.isclass(origin):
diff --git a/src/anthropic/_response.py b/src/anthropic/_response.py
index a6b95999..8734b1d5 100644
--- a/src/anthropic/_response.py
+++ b/src/anthropic/_response.py
@@ -137,7 +137,6 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to and is_annotated_type(cast_to):
             cast_to = extract_type_arg(cast_to, 0)
 
-        cast_to = to if to is not None else self._cast_to
         origin = get_origin(cast_to) or cast_to
 
         if inspect.isclass(origin):

From 7369660c5c783fe23acf5ffee146921824ef5ed8 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 13 Dec 2024 14:01:38 +0000
Subject: [PATCH 11/18] chore(internal): updated imports (#789)

---
 src/anthropic/_client.py | 53 ++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index 8885e519..e2eb27c4 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -8,7 +8,7 @@
 
 import httpx
 
-from . import resources, _constants, _exceptions
+from . import _constants, _exceptions
 from ._qs import Querystring
 from ._types import (
     NOT_GIVEN,
@@ -25,6 +25,7 @@
     get_async_library,
 )
 from ._version import __version__
+from .resources import messages, completions
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import APIStatusError
 from ._base_client import (
@@ -32,13 +33,13 @@
     SyncAPIClient,
     AsyncAPIClient,
 )
+from .resources.beta import beta
 
 __all__ = [
     "Timeout",
     "Transport",
     "ProxiesTypes",
     "RequestOptions",
-    "resources",
     "Anthropic",
     "AsyncAnthropic",
     "Client",
@@ -47,9 +48,9 @@
 
 
 class Anthropic(SyncAPIClient):
-    completions: resources.Completions
-    messages: resources.Messages
-    beta: resources.Beta
+    completions: completions.Completions
+    messages: messages.Messages
+    beta: beta.Beta
     with_raw_response: AnthropicWithRawResponse
     with_streaming_response: AnthropicWithStreamedResponse
 
@@ -117,9 +118,9 @@ def __init__(
 
         self._default_stream_cls = Stream
 
-        self.completions = resources.Completions(self)
-        self.messages = resources.Messages(self)
-        self.beta = resources.Beta(self)
+        self.completions = completions.Completions(self)
+        self.messages = messages.Messages(self)
+        self.beta = beta.Beta(self)
         self.with_raw_response = AnthropicWithRawResponse(self)
         self.with_streaming_response = AnthropicWithStreamedResponse(self)
 
@@ -265,9 +266,9 @@ def _make_status_error(
 
 
 class AsyncAnthropic(AsyncAPIClient):
-    completions: resources.AsyncCompletions
-    messages: resources.AsyncMessages
-    beta: resources.AsyncBeta
+    completions: completions.AsyncCompletions
+    messages: messages.AsyncMessages
+    beta: beta.AsyncBeta
     with_raw_response: AsyncAnthropicWithRawResponse
     with_streaming_response: AsyncAnthropicWithStreamedResponse
 
@@ -335,9 +336,9 @@ def __init__(
 
         self._default_stream_cls = AsyncStream
 
-        self.completions = resources.AsyncCompletions(self)
-        self.messages = resources.AsyncMessages(self)
-        self.beta = resources.AsyncBeta(self)
+        self.completions = completions.AsyncCompletions(self)
+        self.messages = messages.AsyncMessages(self)
+        self.beta = beta.AsyncBeta(self)
         self.with_raw_response = AsyncAnthropicWithRawResponse(self)
         self.with_streaming_response = AsyncAnthropicWithStreamedResponse(self)
 
@@ -484,30 +485,30 @@ def _make_status_error(
 
 class AnthropicWithRawResponse:
     def __init__(self, client: Anthropic) -> None:
-        self.completions = resources.CompletionsWithRawResponse(client.completions)
-        self.messages = resources.MessagesWithRawResponse(client.messages)
-        self.beta = resources.BetaWithRawResponse(client.beta)
+        self.completions = completions.CompletionsWithRawResponse(client.completions)
+        self.messages = messages.MessagesWithRawResponse(client.messages)
+        self.beta = beta.BetaWithRawResponse(client.beta)
 
 
 class AsyncAnthropicWithRawResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
-        self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
-        self.messages = resources.AsyncMessagesWithRawResponse(client.messages)
-        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
+        self.completions = completions.AsyncCompletionsWithRawResponse(client.completions)
+        self.messages = messages.AsyncMessagesWithRawResponse(client.messages)
+        self.beta = beta.AsyncBetaWithRawResponse(client.beta)
 
 
 class AnthropicWithStreamedResponse:
     def __init__(self, client: Anthropic) -> None:
-        self.completions = resources.CompletionsWithStreamingResponse(client.completions)
-        self.messages = resources.MessagesWithStreamingResponse(client.messages)
-        self.beta = resources.BetaWithStreamingResponse(client.beta)
+        self.completions = completions.CompletionsWithStreamingResponse(client.completions)
+        self.messages = messages.MessagesWithStreamingResponse(client.messages)
+        self.beta = beta.BetaWithStreamingResponse(client.beta)
 
 
 class AsyncAnthropicWithStreamedResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
-        self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
-        self.messages = resources.AsyncMessagesWithStreamingResponse(client.messages)
-        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
+        self.completions = completions.AsyncCompletionsWithStreamingResponse(client.completions)
+        self.messages = messages.AsyncMessagesWithStreamingResponse(client.messages)
+        self.beta = beta.AsyncBetaWithStreamingResponse(client.beta)
 
 
 Client = Anthropic

From 76ab5ae025f113f5a8a4d119f55fb17bfbef96af Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 15:53:59 +0000
Subject: [PATCH 12/18] docs(readme): example snippet for client context
 manager (#791)

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index 5a61b0c1..611d1fec 100644
--- a/README.md
+++ b/README.md
@@ -682,6 +682,16 @@ client.with_options(http_client=DefaultHttpxClient(...))
 
 By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
 
+```py
+from anthropic import Anthropic
+
+with Anthropic() as client:
+  # make requests here
+  ...
+
+# HTTP client is now closed
+```
+
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:

From 35409873f1f0f17e74297f70d84105488e3f0779 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 16:04:56 +0000
Subject: [PATCH 13/18] chore(api): update spec version (#792)

---
 .stats.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.stats.yml b/.stats.yml
index 2d482a8b..7068b43a 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 10
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-cf96271afd65b396dc71c4e897d1d55a9526c122e3890bc87b5ff33316c93853.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-4e7eb85acf62b7d64bd47c668ba18355019b71f7f7a497698987ec2246ed74f4.yml

From df6f2106bc11018defef713681f4af97647eaf15 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 17:54:23 +0000
Subject: [PATCH 14/18] chore(internal): update spec (#793)

---
 .stats.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.stats.yml b/.stats.yml
index 7068b43a..e550e0bd 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 10
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-4e7eb85acf62b7d64bd47c668ba18355019b71f7f7a497698987ec2246ed74f4.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-73443ebfebee64b8ec0ebbacd2521d6b6aa900e9526ec97abdcbcff0c0955d9b.yml

From 4e0b15e22fe40e9aa513459564f641bf97c90954 Mon Sep 17 00:00:00 2001
From: Robert Craigie <robert@craigie.dev>
Date: Tue, 17 Dec 2024 11:22:53 +0000
Subject: [PATCH 15/18] feat(api): general availability updates

The following APIs are now GA and have been moved out of the beta namespace:

- Prompt caching
- Token counting
- PDF Support
- The Batch API

This commit also adds new endpoints for listing available models.

https://docs.anthropic.com/en/release-notes/api
---
 .stats.yml                                    |    4 +-
 api.md                                        |  105 +-
 src/anthropic/_client.py                      |   11 +-
 src/anthropic/lib/streaming/__init__.py       |    6 -
 .../_prompt_caching_beta_messages.py          |  423 ----
 .../streaming/_prompt_caching_beta_types.py   |   32 -
 src/anthropic/resources/__init__.py           |   14 +
 src/anthropic/resources/beta/__init__.py      |   28 +-
 src/anthropic/resources/beta/beta.py          |   64 +-
 .../resources/beta/messages/batches.py        |    4 +-
 src/anthropic/resources/beta/models.py        |  300 +++
 .../resources/beta/prompt_caching/messages.py | 1954 -----------------
 .../beta/prompt_caching/prompt_caching.py     |  102 -
 .../prompt_caching => messages}/__init__.py   |   30 +-
 src/anthropic/resources/messages/batches.py   |  618 ++++++
 .../resources/{ => messages}/messages.py      |  517 ++++-
 src/anthropic/resources/models.py             |  300 +++
 src/anthropic/types/__init__.py               |   22 +
 .../types/base64_pdf_source_param.py          |   23 +
 src/anthropic/types/beta/__init__.py          |    2 +
 src/anthropic/types/beta/beta_model_info.py   |   28 +
 .../beta_raw_content_block_delta_event.py     |    5 +-
 .../types/beta/messages/batch_list_params.py  |    2 +-
 src/anthropic/types/beta/model_list_params.py |   27 +
 .../types/beta/prompt_caching/__init__.py     |   26 -
 .../prompt_caching_beta_image_block_param.py  |   32 -
 .../prompt_caching_beta_message.py            |  109 -
 .../prompt_caching_beta_message_param.py      |   33 -
 .../prompt_caching_beta_text_block_param.py   |   18 -
 .../prompt_caching_beta_tool_param.py         |   45 -
 ...pt_caching_beta_tool_result_block_param.py |   26 -
 ...rompt_caching_beta_tool_use_block_param.py |   22 -
 .../prompt_caching_beta_usage.py              |   21 -
 ...prompt_caching_beta_message_start_event.py |   14 -
 ...rompt_caching_beta_message_stream_event.py |   26 -
 src/anthropic/types/beta_billing_error.py     |   13 +
 src/anthropic/types/beta_error.py             |    4 +
 .../types/beta_gateway_timeout_error.py       |   13 +
 ...am.py => cache_control_ephemeral_param.py} |    4 +-
 src/anthropic/types/content_block_param.py    |    5 +-
 src/anthropic/types/document_block_param.py   |   19 +
 src/anthropic/types/image_block_param.py      |    5 +-
 ...rams.py => message_count_tokens_params.py} |  138 +-
 src/anthropic/types/message_param.py          |   13 +-
 src/anthropic/types/message_tokens_count.py   |   14 +
 src/anthropic/types/messages/__init__.py      |   14 +
 .../types/messages/batch_create_params.py     |   36 +
 .../types/messages/batch_list_params.py       |   27 +
 src/anthropic/types/messages/message_batch.py |   77 +
 .../messages/message_batch_canceled_result.py |   11 +
 .../messages/message_batch_errored_result.py  |   14 +
 .../messages/message_batch_expired_result.py  |   11 +
 .../message_batch_individual_response.py      |   26 +
 .../messages/message_batch_request_counts.py  |   35 +
 .../types/messages/message_batch_result.py    |   19 +
 .../message_batch_succeeded_result.py         |   14 +
 src/anthropic/types/model_info.py             |   28 +
 src/anthropic/types/model_list_params.py      |   27 +
 .../types/raw_content_block_delta_event.py    |    5 +-
 src/anthropic/types/shared/__init__.py        |   13 +
 .../types/shared/api_error_object.py          |   13 +
 .../types/shared/authentication_error.py      |   13 +
 src/anthropic/types/shared/billing_error.py   |   13 +
 src/anthropic/types/shared/error_object.py    |   32 +
 src/anthropic/types/shared/error_response.py  |   14 +
 .../types/shared/gateway_timeout_error.py     |   13 +
 .../types/shared/invalid_request_error.py     |   13 +
 src/anthropic/types/shared/not_found_error.py |   13 +
 .../types/shared/overloaded_error.py          |   13 +
 .../types/shared/permission_error.py          |   13 +
 .../types/shared/rate_limit_error.py          |   13 +
 src/anthropic/types/text_block_param.py       |    5 +
 src/anthropic/types/tool_param.py             |    4 +
 .../types/tool_result_block_param.py          |    5 +-
 src/anthropic/types/tool_use_block_param.py   |    5 +
 src/anthropic/types/usage.py                  |    7 +
 .../beta/prompt_caching/test_messages.py      |  442 ----
 tests/api_resources/beta/test_models.py       |  167 ++
 .../prompt_caching => messages}/__init__.py   |    0
 tests/api_resources/messages/test_batches.py  |  469 ++++
 tests/api_resources/test_messages.py          |  199 +-
 tests/api_resources/test_models.py            |  167 ++
 82 files changed, 3631 insertions(+), 3580 deletions(-)
 delete mode 100644 src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
 delete mode 100644 src/anthropic/lib/streaming/_prompt_caching_beta_types.py
 create mode 100644 src/anthropic/resources/beta/models.py
 delete mode 100644 src/anthropic/resources/beta/prompt_caching/messages.py
 delete mode 100644 src/anthropic/resources/beta/prompt_caching/prompt_caching.py
 rename src/anthropic/resources/{beta/prompt_caching => messages}/__init__.py (52%)
 create mode 100644 src/anthropic/resources/messages/batches.py
 rename src/anthropic/resources/{ => messages}/messages.py (80%)
 create mode 100644 src/anthropic/resources/models.py
 create mode 100644 src/anthropic/types/base64_pdf_source_param.py
 create mode 100644 src/anthropic/types/beta/beta_model_info.py
 create mode 100644 src/anthropic/types/beta/model_list_params.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/__init__.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
 delete mode 100644 src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
 create mode 100644 src/anthropic/types/beta_billing_error.py
 create mode 100644 src/anthropic/types/beta_gateway_timeout_error.py
 rename src/anthropic/types/{beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py => cache_control_ephemeral_param.py} (62%)
 create mode 100644 src/anthropic/types/document_block_param.py
 rename src/anthropic/types/{beta/prompt_caching/message_create_params.py => message_count_tokens_params.py} (55%)
 create mode 100644 src/anthropic/types/message_tokens_count.py
 create mode 100644 src/anthropic/types/messages/__init__.py
 create mode 100644 src/anthropic/types/messages/batch_create_params.py
 create mode 100644 src/anthropic/types/messages/batch_list_params.py
 create mode 100644 src/anthropic/types/messages/message_batch.py
 create mode 100644 src/anthropic/types/messages/message_batch_canceled_result.py
 create mode 100644 src/anthropic/types/messages/message_batch_errored_result.py
 create mode 100644 src/anthropic/types/messages/message_batch_expired_result.py
 create mode 100644 src/anthropic/types/messages/message_batch_individual_response.py
 create mode 100644 src/anthropic/types/messages/message_batch_request_counts.py
 create mode 100644 src/anthropic/types/messages/message_batch_result.py
 create mode 100644 src/anthropic/types/messages/message_batch_succeeded_result.py
 create mode 100644 src/anthropic/types/model_info.py
 create mode 100644 src/anthropic/types/model_list_params.py
 create mode 100644 src/anthropic/types/shared/__init__.py
 create mode 100644 src/anthropic/types/shared/api_error_object.py
 create mode 100644 src/anthropic/types/shared/authentication_error.py
 create mode 100644 src/anthropic/types/shared/billing_error.py
 create mode 100644 src/anthropic/types/shared/error_object.py
 create mode 100644 src/anthropic/types/shared/error_response.py
 create mode 100644 src/anthropic/types/shared/gateway_timeout_error.py
 create mode 100644 src/anthropic/types/shared/invalid_request_error.py
 create mode 100644 src/anthropic/types/shared/not_found_error.py
 create mode 100644 src/anthropic/types/shared/overloaded_error.py
 create mode 100644 src/anthropic/types/shared/permission_error.py
 create mode 100644 src/anthropic/types/shared/rate_limit_error.py
 delete mode 100644 tests/api_resources/beta/prompt_caching/test_messages.py
 create mode 100644 tests/api_resources/beta/test_models.py
 rename tests/api_resources/{beta/prompt_caching => messages}/__init__.py (100%)
 create mode 100644 tests/api_resources/messages/test_batches.py
 create mode 100644 tests/api_resources/test_models.py

diff --git a/.stats.yml b/.stats.yml
index e550e0bd..19e9daeb 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 10
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-73443ebfebee64b8ec0ebbacd2521d6b6aa900e9526ec97abdcbcff0c0955d9b.yml
+configured_endpoints: 19
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-be055148d227480fcacc9086c37ac8009dcb487731069ada51af35044f65bee4.yml
diff --git a/api.md b/api.md
index 1ebd122d..7669870a 100644
--- a/api.md
+++ b/api.md
@@ -1,14 +1,35 @@
+# Shared Types
+
+```python
+from anthropic.types import (
+    APIErrorObject,
+    AuthenticationError,
+    BillingError,
+    ErrorObject,
+    ErrorResponse,
+    GatewayTimeoutError,
+    InvalidRequestError,
+    NotFoundError,
+    OverloadedError,
+    PermissionError,
+    RateLimitError,
+)
+```
+
 # Messages
 
 Types:
 
 ```python
 from anthropic.types import (
+    Base64PDFSource,
+    CacheControlEphemeral,
     ContentBlock,
     ContentBlockDeltaEvent,
     ContentBlockParam,
     ContentBlockStartEvent,
     ContentBlockStopEvent,
+    DocumentBlockParam,
     ImageBlockParam,
     InputJSONDelta,
     Message,
@@ -18,6 +39,7 @@ from anthropic.types import (
     MessageStartEvent,
     MessageStopEvent,
     MessageStreamEvent,
+    MessageTokensCount,
     Metadata,
     Model,
     RawContentBlockDeltaEvent,
@@ -44,8 +66,47 @@ from anthropic.types import (
 
 Methods:
 
-- <code title="post /v1/messages">client.messages.<a href="./src/anthropic/resources/messages.py">create</a>(\*\*<a href="src/anthropic/types/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/message.py">Message</a></code>
+- <code title="post /v1/messages">client.messages.<a href="./src/anthropic/resources/messages/messages.py">create</a>(\*\*<a href="src/anthropic/types/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/message.py">Message</a></code>
 - <code>client.messages.<a href="./src/anthropic/resources/messages.py">stream</a>(\*args) -> MessageStreamManager[MessageStream] | MessageStreamManager[MessageStreamT]</code>
+- <code title="post /v1/messages/count_tokens">client.messages.<a href="./src/anthropic/resources/messages/messages.py">count_tokens</a>(\*\*<a href="src/anthropic/types/message_count_tokens_params.py">params</a>) -> <a href="./src/anthropic/types/message_tokens_count.py">MessageTokensCount</a></code>
+
+## Batches
+
+Types:
+
+```python
+from anthropic.types.messages import (
+    MessageBatch,
+    MessageBatchCanceledResult,
+    MessageBatchErroredResult,
+    MessageBatchExpiredResult,
+    MessageBatchIndividualResponse,
+    MessageBatchRequestCounts,
+    MessageBatchResult,
+    MessageBatchSucceededResult,
+)
+```
+
+Methods:
+
+- <code title="post /v1/messages/batches">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">create</a>(\*\*<a href="src/anthropic/types/messages/batch_create_params.py">params</a>) -> <a href="./src/anthropic/types/messages/message_batch.py">MessageBatch</a></code>
+- <code title="get /v1/messages/batches/{message_batch_id}">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">retrieve</a>(message_batch_id) -> <a href="./src/anthropic/types/messages/message_batch.py">MessageBatch</a></code>
+- <code title="get /v1/messages/batches">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">list</a>(\*\*<a href="src/anthropic/types/messages/batch_list_params.py">params</a>) -> <a href="./src/anthropic/types/messages/message_batch.py">SyncPage[MessageBatch]</a></code>
+- <code title="post /v1/messages/batches/{message_batch_id}/cancel">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">cancel</a>(message_batch_id) -> <a href="./src/anthropic/types/messages/message_batch.py">MessageBatch</a></code>
+- <code title="get /v1/messages/batches/{message_batch_id}/results">client.messages.batches.<a href="./src/anthropic/resources/messages/batches.py">results</a>(message_batch_id) -> BinaryAPIResponse</code>
+
+# Models
+
+Types:
+
+```python
+from anthropic.types import ModelInfo
+```
+
+Methods:
+
+- <code title="get /v1/models/{model_id}">client.models.<a href="./src/anthropic/resources/models.py">retrieve</a>(model_id) -> <a href="./src/anthropic/types/model_info.py">ModelInfo</a></code>
+- <code title="get /v1/models">client.models.<a href="./src/anthropic/resources/models.py">list</a>(\*\*<a href="src/anthropic/types/model_list_params.py">params</a>) -> <a href="./src/anthropic/types/model_info.py">SyncPage[ModelInfo]</a></code>
 
 # Beta
 
@@ -56,8 +117,10 @@ from anthropic.types import (
     AnthropicBeta,
     BetaAPIError,
     BetaAuthenticationError,
+    BetaBillingError,
     BetaError,
     BetaErrorResponse,
+    BetaGatewayTimeoutError,
     BetaInvalidRequestError,
     BetaNotFoundError,
     BetaOverloadedError,
@@ -66,6 +129,19 @@ from anthropic.types import (
 )
 ```
 
+## Models
+
+Types:
+
+```python
+from anthropic.types.beta import BetaModelInfo
+```
+
+Methods:
+
+- <code title="get /v1/models/{model_id}?beta=true">client.beta.models.<a href="./src/anthropic/resources/beta/models.py">retrieve</a>(model_id) -> <a href="./src/anthropic/types/beta/beta_model_info.py">BetaModelInfo</a></code>
+- <code title="get /v1/models?beta=true">client.beta.models.<a href="./src/anthropic/resources/beta/models.py">list</a>(\*\*<a href="src/anthropic/types/beta/model_list_params.py">params</a>) -> <a href="./src/anthropic/types/beta/beta_model_info.py">SyncPage[BetaModelInfo]</a></code>
+
 ## Messages
 
 Types:
@@ -139,30 +215,3 @@ Methods:
 - <code title="get /v1/messages/batches?beta=true">client.beta.messages.batches.<a href="./src/anthropic/resources/beta/messages/batches.py">list</a>(\*\*<a href="src/anthropic/types/beta/messages/batch_list_params.py">params</a>) -> <a href="./src/anthropic/types/beta/messages/beta_message_batch.py">SyncPage[BetaMessageBatch]</a></code>
 - <code title="post /v1/messages/batches/{message_batch_id}/cancel?beta=true">client.beta.messages.batches.<a href="./src/anthropic/resources/beta/messages/batches.py">cancel</a>(message_batch_id) -> <a href="./src/anthropic/types/beta/messages/beta_message_batch.py">BetaMessageBatch</a></code>
 - <code title="get /v1/messages/batches/{message_batch_id}/results?beta=true">client.beta.messages.batches.<a href="./src/anthropic/resources/beta/messages/batches.py">results</a>(message_batch_id) -> BinaryAPIResponse</code>
-
-## PromptCaching
-
-### Messages
-
-Types:
-
-```python
-from anthropic.types.beta.prompt_caching import (
-    PromptCachingBetaCacheControlEphemeral,
-    PromptCachingBetaImageBlockParam,
-    PromptCachingBetaMessage,
-    PromptCachingBetaMessageParam,
-    PromptCachingBetaTextBlockParam,
-    PromptCachingBetaTool,
-    PromptCachingBetaToolResultBlockParam,
-    PromptCachingBetaToolUseBlockParam,
-    PromptCachingBetaUsage,
-    RawPromptCachingBetaMessageStartEvent,
-    RawPromptCachingBetaMessageStreamEvent,
-)
-```
-
-Methods:
-
-- <code title="post /v1/messages?beta=prompt_caching">client.beta.prompt_caching.messages.<a href="./src/anthropic/resources/beta/prompt_caching/messages.py">create</a>(\*\*<a href="src/anthropic/types/beta/prompt_caching/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py">PromptCachingBetaMessage</a></code>
-- <code title="post /v1/messages?beta=prompt_caching">client.beta.prompt_caching.messages.<a href="./src/anthropic/resources/beta/prompt_caching/messages.py">stream</a>(\*\*<a href="src/anthropic/types/beta/prompt_caching/message_create_params.py">params</a>) -> <a href="./src/anthropic/lib/streaming/_prompt_caching_beta_messages.py">PromptCachingBetaMessageStreamManager</a></code>
diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index e2eb27c4..8bf77861 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -25,7 +25,7 @@
     get_async_library,
 )
 from ._version import __version__
-from .resources import messages, completions
+from .resources import models, completions
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import APIStatusError
 from ._base_client import (
@@ -34,6 +34,7 @@
     AsyncAPIClient,
 )
 from .resources.beta import beta
+from .resources.messages import messages
 
 __all__ = [
     "Timeout",
@@ -50,6 +51,7 @@
 class Anthropic(SyncAPIClient):
     completions: completions.Completions
     messages: messages.Messages
+    models: models.Models
     beta: beta.Beta
     with_raw_response: AnthropicWithRawResponse
     with_streaming_response: AnthropicWithStreamedResponse
@@ -120,6 +122,7 @@ def __init__(
 
         self.completions = completions.Completions(self)
         self.messages = messages.Messages(self)
+        self.models = models.Models(self)
         self.beta = beta.Beta(self)
         self.with_raw_response = AnthropicWithRawResponse(self)
         self.with_streaming_response = AnthropicWithStreamedResponse(self)
@@ -268,6 +271,7 @@ def _make_status_error(
 class AsyncAnthropic(AsyncAPIClient):
     completions: completions.AsyncCompletions
     messages: messages.AsyncMessages
+    models: models.AsyncModels
     beta: beta.AsyncBeta
     with_raw_response: AsyncAnthropicWithRawResponse
     with_streaming_response: AsyncAnthropicWithStreamedResponse
@@ -338,6 +342,7 @@ def __init__(
 
         self.completions = completions.AsyncCompletions(self)
         self.messages = messages.AsyncMessages(self)
+        self.models = models.AsyncModels(self)
         self.beta = beta.AsyncBeta(self)
         self.with_raw_response = AsyncAnthropicWithRawResponse(self)
         self.with_streaming_response = AsyncAnthropicWithStreamedResponse(self)
@@ -487,6 +492,7 @@ class AnthropicWithRawResponse:
     def __init__(self, client: Anthropic) -> None:
         self.completions = completions.CompletionsWithRawResponse(client.completions)
         self.messages = messages.MessagesWithRawResponse(client.messages)
+        self.models = models.ModelsWithRawResponse(client.models)
         self.beta = beta.BetaWithRawResponse(client.beta)
 
 
@@ -494,6 +500,7 @@ class AsyncAnthropicWithRawResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
         self.completions = completions.AsyncCompletionsWithRawResponse(client.completions)
         self.messages = messages.AsyncMessagesWithRawResponse(client.messages)
+        self.models = models.AsyncModelsWithRawResponse(client.models)
         self.beta = beta.AsyncBetaWithRawResponse(client.beta)
 
 
@@ -501,6 +508,7 @@ class AnthropicWithStreamedResponse:
     def __init__(self, client: Anthropic) -> None:
         self.completions = completions.CompletionsWithStreamingResponse(client.completions)
         self.messages = messages.MessagesWithStreamingResponse(client.messages)
+        self.models = models.ModelsWithStreamingResponse(client.models)
         self.beta = beta.BetaWithStreamingResponse(client.beta)
 
 
@@ -508,6 +516,7 @@ class AsyncAnthropicWithStreamedResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
         self.completions = completions.AsyncCompletionsWithStreamingResponse(client.completions)
         self.messages = messages.AsyncMessagesWithStreamingResponse(client.messages)
+        self.models = models.AsyncModelsWithStreamingResponse(client.models)
         self.beta = beta.AsyncBetaWithStreamingResponse(client.beta)
 
 
diff --git a/src/anthropic/lib/streaming/__init__.py b/src/anthropic/lib/streaming/__init__.py
index fbd25b02..0ab41209 100644
--- a/src/anthropic/lib/streaming/__init__.py
+++ b/src/anthropic/lib/streaming/__init__.py
@@ -11,9 +11,3 @@
     MessageStreamManager as MessageStreamManager,
     AsyncMessageStreamManager as AsyncMessageStreamManager,
 )
-from ._prompt_caching_beta_messages import (
-    PromptCachingBetaMessageStream as PromptCachingBetaMessageStream,
-    AsyncPromptCachingBetaMessageStream as AsyncPromptCachingBetaMessageStream,
-    PromptCachingBetaMessageStreamManager as PromptCachingBetaMessageStreamManager,
-    AsyncPromptCachingBetaMessageStreamManager as AsyncPromptCachingBetaMessageStreamManager,
-)
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py b/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
deleted file mode 100644
index df727ea8..00000000
--- a/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
+++ /dev/null
@@ -1,423 +0,0 @@
-from __future__ import annotations
-
-from types import TracebackType
-from typing import TYPE_CHECKING, Any, Callable, cast
-from typing_extensions import Self, Iterator, Awaitable, AsyncIterator, assert_never
-
-import httpx
-
-from ...types import ContentBlock
-from ..._utils import consume_sync_iterator, consume_async_iterator
-from ..._models import build, construct_type
-from ..._streaming import Stream, AsyncStream
-from ._prompt_caching_beta_types import (
-    TextEvent,
-    InputJsonEvent,
-    MessageStopEvent,
-    ContentBlockStopEvent,
-    PromptCachingBetaMessageStreamEvent,
-)
-from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStreamEvent
-
-if TYPE_CHECKING:
-    from ..._client import Anthropic, AsyncAnthropic
-
-
-class PromptCachingBetaMessageStream:
-    text_stream: Iterator[str]
-    """Iterator over just the text deltas in the stream.
-
-    ```py
-    for text in stream.text_stream:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    response: httpx.Response
-
-    def __init__(
-        self,
-        *,
-        cast_to: type[RawPromptCachingBetaMessageStreamEvent],
-        response: httpx.Response,
-        client: Anthropic,
-    ) -> None:
-        self.response = response
-        self._cast_to = cast_to
-        self._client = client
-
-        self.text_stream = self.__stream_text__()
-        self.__final_message_snapshot: PromptCachingBetaMessage | None = None
-
-        self._iterator = self.__stream__()
-        self._raw_stream: Stream[RawPromptCachingBetaMessageStreamEvent] = Stream(
-            cast_to=cast_to, response=response, client=client
-        )
-
-    def __next__(self) -> PromptCachingBetaMessageStreamEvent:
-        return self._iterator.__next__()
-
-    def __iter__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
-        for item in self._iterator:
-            yield item
-
-    def __enter__(self) -> Self:
-        return self
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        self.close()
-
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        self.response.close()
-
-    def get_final_message(self) -> PromptCachingBetaMessage:
-        """Waits until the stream has been read to completion and returns
-        the accumulated `PromptCachingBetaMessage` object.
-        """
-        self.until_done()
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    def get_final_text(self) -> str:
-        """Returns all `text` content blocks concatenated together.
-
-        > [!NOTE]
-        > Currently the API will only respond with a single content block.
-
-        Will raise an error if no `text` content blocks were returned.
-        """
-        message = self.get_final_message()
-        text_blocks: list[str] = []
-        for block in message.content:
-            if block.type == "text":
-                text_blocks.append(block.text)
-
-        if not text_blocks:
-            raise RuntimeError("Expected to have received at least 1 text block")
-
-        return "".join(text_blocks)
-
-    def until_done(self) -> None:
-        """Blocks until the stream has been consumed"""
-        consume_sync_iterator(self)
-
-    # properties
-    @property
-    def current_message_snapshot(self) -> PromptCachingBetaMessage:
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    def __stream__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
-        for sse_event in self._raw_stream:
-            self.__final_message_snapshot = accumulate_event(
-                event=sse_event,
-                current_snapshot=self.__final_message_snapshot,
-            )
-
-            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
-            for event in events_to_fire:
-                yield event
-
-    def __stream_text__(self) -> Iterator[str]:
-        for chunk in self:
-            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-                yield chunk.delta.text
-
-
-class PromptCachingBetaMessageStreamManager:
-    """Wrapper over PromptCachingBetaMessageStream that is returned by `.stream()`.
-
-    ```py
-    with client.beta.prompt_caching.messages.stream(...) as stream:
-        for chunk in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Callable[[], Stream[RawPromptCachingBetaMessageStreamEvent]],
-    ) -> None:
-        self.__stream: PromptCachingBetaMessageStream | None = None
-        self.__api_request = api_request
-
-    def __enter__(self) -> PromptCachingBetaMessageStream:
-        raw_stream = self.__api_request()
-
-        self.__stream = PromptCachingBetaMessageStream(
-            cast_to=raw_stream._cast_to,
-            response=raw_stream.response,
-            client=raw_stream._client,
-        )
-
-        return self.__stream
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            self.__stream.close()
-
-
-class AsyncPromptCachingBetaMessageStream:
-    text_stream: AsyncIterator[str]
-    """Async iterator over just the text deltas in the stream.
-
-    ```py
-    async for text in stream.text_stream:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    response: httpx.Response
-
-    def __init__(
-        self,
-        *,
-        cast_to: type[RawPromptCachingBetaMessageStreamEvent],
-        response: httpx.Response,
-        client: AsyncAnthropic,
-    ) -> None:
-        self.response = response
-        self._cast_to = cast_to
-        self._client = client
-
-        self.text_stream = self.__stream_text__()
-        self.__final_message_snapshot: PromptCachingBetaMessage | None = None
-
-        self._iterator = self.__stream__()
-        self._raw_stream: AsyncStream[RawPromptCachingBetaMessageStreamEvent] = AsyncStream(
-            cast_to=cast_to, response=response, client=client
-        )
-
-    async def __anext__(self) -> PromptCachingBetaMessageStreamEvent:
-        return await self._iterator.__anext__()
-
-    async def __aiter__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
-        async for item in self._iterator:
-            yield item
-
-    async def __aenter__(self) -> Self:
-        return self
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        await self.close()
-
-    async def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        await self.response.aclose()
-
-    async def get_final_message(self) -> PromptCachingBetaMessage:
-        """Waits until the stream has been read to completion and returns
-        the accumulated `PromptCachingBetaMessage` object.
-        """
-        await self.until_done()
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    async def get_final_text(self) -> str:
-        """Returns all `text` content blocks concatenated together.
-
-        > [!NOTE]
-        > Currently the API will only respond with a single content block.
-
-        Will raise an error if no `text` content blocks were returned.
-        """
-        message = await self.get_final_message()
-        text_blocks: list[str] = []
-        for block in message.content:
-            if block.type == "text":
-                text_blocks.append(block.text)
-
-        if not text_blocks:
-            raise RuntimeError("Expected to have received at least 1 text block")
-
-        return "".join(text_blocks)
-
-    async def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        await consume_async_iterator(self)
-
-    # properties
-    @property
-    def current_message_snapshot(self) -> PromptCachingBetaMessage:
-        assert self.__final_message_snapshot is not None
-        return self.__final_message_snapshot
-
-    async def __stream__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
-        async for sse_event in self._raw_stream:
-            self.__final_message_snapshot = accumulate_event(
-                event=sse_event,
-                current_snapshot=self.__final_message_snapshot,
-            )
-
-            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
-            for event in events_to_fire:
-                yield event
-
-    async def __stream_text__(self) -> AsyncIterator[str]:
-        async for chunk in self:
-            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
-                yield chunk.delta.text
-
-
-class AsyncPromptCachingBetaMessageStreamManager:
-    """Wrapper over AsyncMessageStream that is returned by `.stream()`
-    so that an async context manager can be used without `await`ing the
-    original client call.
-
-    ```py
-    async with client.messages.stream(...) as stream:
-        async for chunk in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Awaitable[AsyncStream[RawPromptCachingBetaMessageStreamEvent]],
-    ) -> None:
-        self.__stream: AsyncPromptCachingBetaMessageStream | None = None
-        self.__api_request = api_request
-
-    async def __aenter__(self) -> AsyncPromptCachingBetaMessageStream:
-        raw_stream = await self.__api_request
-
-        self.__stream = AsyncPromptCachingBetaMessageStream(
-            cast_to=raw_stream._cast_to,
-            response=raw_stream.response,
-            client=raw_stream._client,
-        )
-
-        return self.__stream
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            await self.__stream.close()
-
-
-def build_events(
-    *,
-    event: RawPromptCachingBetaMessageStreamEvent,
-    message_snapshot: PromptCachingBetaMessage,
-) -> list[PromptCachingBetaMessageStreamEvent]:
-    events_to_fire: list[PromptCachingBetaMessageStreamEvent] = []
-
-    if event.type == "message_start":
-        events_to_fire.append(event)
-    elif event.type == "message_delta":
-        events_to_fire.append(event)
-    elif event.type == "message_stop":
-        events_to_fire.append(build(MessageStopEvent, type="message_stop", message=message_snapshot))
-    elif event.type == "content_block_start":
-        events_to_fire.append(event)
-    elif event.type == "content_block_delta":
-        events_to_fire.append(event)
-
-        content_block = message_snapshot.content[event.index]
-        if event.delta.type == "text_delta" and content_block.type == "text":
-            events_to_fire.append(
-                build(
-                    TextEvent,
-                    type="text",
-                    text=event.delta.text,
-                    snapshot=content_block.text,
-                )
-            )
-        elif event.delta.type == "input_json_delta" and content_block.type == "tool_use":
-            events_to_fire.append(
-                build(
-                    InputJsonEvent,
-                    type="input_json",
-                    partial_json=event.delta.partial_json,
-                    snapshot=content_block.input,
-                )
-            )
-    elif event.type == "content_block_stop":
-        content_block = message_snapshot.content[event.index]
-
-        events_to_fire.append(
-            build(ContentBlockStopEvent, type="content_block_stop", index=event.index, content_block=content_block),
-        )
-    else:
-        # we only want exhaustive checking for linters, not at runtime
-        if TYPE_CHECKING:  # type: ignore[unreachable]
-            assert_never(event)
-
-    return events_to_fire
-
-
-JSON_BUF_PROPERTY = "__json_buf"
-
-
-def accumulate_event(
-    *,
-    event: RawPromptCachingBetaMessageStreamEvent,
-    current_snapshot: PromptCachingBetaMessage | None,
-) -> PromptCachingBetaMessage:
-    if current_snapshot is None:
-        if event.type == "message_start":
-            return PromptCachingBetaMessage.construct(**cast(Any, event.message.to_dict()))
-
-        raise RuntimeError(f'Unexpected event order, got {event.type} before "message_start"')
-
-    if event.type == "content_block_start":
-        # TODO: check index
-        current_snapshot.content.append(
-            cast(
-                ContentBlock,
-                construct_type(type_=ContentBlock, value=event.content_block.model_dump()),
-            ),
-        )
-    elif event.type == "content_block_delta":
-        content = current_snapshot.content[event.index]
-        if content.type == "text" and event.delta.type == "text_delta":
-            content.text += event.delta.text
-        elif content.type == "tool_use" and event.delta.type == "input_json_delta":
-            from jiter import from_json
-
-            # we need to keep track of the raw JSON string as well so that we can
-            # re-parse it for each delta, for now we just store it as an untyped
-            # property on the snapshot
-            json_buf = cast(bytes, getattr(content, JSON_BUF_PROPERTY, b""))
-            json_buf += bytes(event.delta.partial_json, "utf-8")
-
-            if json_buf:
-                content.input = from_json(json_buf, partial_mode=True)
-
-            setattr(content, JSON_BUF_PROPERTY, json_buf)
-    elif event.type == "message_delta":
-        current_snapshot.stop_reason = event.delta.stop_reason
-        current_snapshot.stop_sequence = event.delta.stop_sequence
-        current_snapshot.usage.output_tokens = event.usage.output_tokens
-
-    return current_snapshot
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_types.py b/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
deleted file mode 100644
index d8fdce52..00000000
--- a/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from typing import Union
-from typing_extensions import Literal
-
-from ._types import (
-    TextEvent,
-    InputJsonEvent,
-    RawMessageDeltaEvent,
-    ContentBlockStopEvent,
-    RawContentBlockDeltaEvent,
-    RawContentBlockStartEvent,
-)
-from ...types import RawMessageStopEvent
-from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStartEvent
-
-
-class MessageStopEvent(RawMessageStopEvent):
-    type: Literal["message_stop"]
-
-    message: PromptCachingBetaMessage
-
-
-PromptCachingBetaMessageStreamEvent = Union[
-    RawPromptCachingBetaMessageStartEvent,
-    MessageStopEvent,
-    # same as non-beta
-    TextEvent,
-    InputJsonEvent,
-    RawMessageDeltaEvent,
-    RawContentBlockStartEvent,
-    RawContentBlockDeltaEvent,
-    ContentBlockStopEvent,
-]
diff --git a/src/anthropic/resources/__init__.py b/src/anthropic/resources/__init__.py
index 318d5cdd..ffff8855 100644
--- a/src/anthropic/resources/__init__.py
+++ b/src/anthropic/resources/__init__.py
@@ -8,6 +8,14 @@
     BetaWithStreamingResponse,
     AsyncBetaWithStreamingResponse,
 )
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -38,6 +46,12 @@
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
diff --git a/src/anthropic/resources/beta/__init__.py b/src/anthropic/resources/beta/__init__.py
index d06a0802..82b343fa 100644
--- a/src/anthropic/resources/beta/__init__.py
+++ b/src/anthropic/resources/beta/__init__.py
@@ -8,6 +8,14 @@
     BetaWithStreamingResponse,
     AsyncBetaWithStreamingResponse,
 )
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -16,28 +24,20 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from .prompt_caching import (
-    PromptCaching,
-    AsyncPromptCaching,
-    PromptCachingWithRawResponse,
-    AsyncPromptCachingWithRawResponse,
-    PromptCachingWithStreamingResponse,
-    AsyncPromptCachingWithStreamingResponse,
-)
 
 __all__ = [
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
-    "PromptCaching",
-    "AsyncPromptCaching",
-    "PromptCachingWithRawResponse",
-    "AsyncPromptCachingWithRawResponse",
-    "PromptCachingWithStreamingResponse",
-    "AsyncPromptCachingWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
diff --git a/src/anthropic/resources/beta/beta.py b/src/anthropic/resources/beta/beta.py
index fbff30fa..8293782d 100644
--- a/src/anthropic/resources/beta/beta.py
+++ b/src/anthropic/resources/beta/beta.py
@@ -2,6 +2,14 @@
 
 from __future__ import annotations
 
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from .messages.messages import (
@@ -12,26 +20,18 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from .prompt_caching.prompt_caching import (
-    PromptCaching,
-    AsyncPromptCaching,
-    PromptCachingWithRawResponse,
-    AsyncPromptCachingWithRawResponse,
-    PromptCachingWithStreamingResponse,
-    AsyncPromptCachingWithStreamingResponse,
-)
 
 __all__ = ["Beta", "AsyncBeta"]
 
 
 class Beta(SyncAPIResource):
     @cached_property
-    def messages(self) -> Messages:
-        return Messages(self._client)
+    def models(self) -> Models:
+        return Models(self._client)
 
     @cached_property
-    def prompt_caching(self) -> PromptCaching:
-        return PromptCaching(self._client)
+    def messages(self) -> Messages:
+        return Messages(self._client)
 
     @cached_property
     def with_raw_response(self) -> BetaWithRawResponse:
@@ -55,12 +55,12 @@ def with_streaming_response(self) -> BetaWithStreamingResponse:
 
 class AsyncBeta(AsyncAPIResource):
     @cached_property
-    def messages(self) -> AsyncMessages:
-        return AsyncMessages(self._client)
+    def models(self) -> AsyncModels:
+        return AsyncModels(self._client)
 
     @cached_property
-    def prompt_caching(self) -> AsyncPromptCaching:
-        return AsyncPromptCaching(self._client)
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
 
     @cached_property
     def with_raw_response(self) -> AsyncBetaWithRawResponse:
@@ -87,12 +87,12 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> MessagesWithRawResponse:
-        return MessagesWithRawResponse(self._beta.messages)
+    def models(self) -> ModelsWithRawResponse:
+        return ModelsWithRawResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> PromptCachingWithRawResponse:
-        return PromptCachingWithRawResponse(self._beta.prompt_caching)
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._beta.messages)
 
 
 class AsyncBetaWithRawResponse:
@@ -100,12 +100,12 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> AsyncMessagesWithRawResponse:
-        return AsyncMessagesWithRawResponse(self._beta.messages)
+    def models(self) -> AsyncModelsWithRawResponse:
+        return AsyncModelsWithRawResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> AsyncPromptCachingWithRawResponse:
-        return AsyncPromptCachingWithRawResponse(self._beta.prompt_caching)
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._beta.messages)
 
 
 class BetaWithStreamingResponse:
@@ -113,12 +113,12 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> MessagesWithStreamingResponse:
-        return MessagesWithStreamingResponse(self._beta.messages)
+    def models(self) -> ModelsWithStreamingResponse:
+        return ModelsWithStreamingResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> PromptCachingWithStreamingResponse:
-        return PromptCachingWithStreamingResponse(self._beta.prompt_caching)
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._beta.messages)
 
 
 class AsyncBetaWithStreamingResponse:
@@ -126,9 +126,9 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def messages(self) -> AsyncMessagesWithStreamingResponse:
-        return AsyncMessagesWithStreamingResponse(self._beta.messages)
+    def models(self) -> AsyncModelsWithStreamingResponse:
+        return AsyncModelsWithStreamingResponse(self._beta.models)
 
     @cached_property
-    def prompt_caching(self) -> AsyncPromptCachingWithStreamingResponse:
-        return AsyncPromptCachingWithStreamingResponse(self._beta.prompt_caching)
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._beta.messages)
diff --git a/src/anthropic/resources/beta/messages/batches.py b/src/anthropic/resources/beta/messages/batches.py
index 19a1f86f..4b070a04 100644
--- a/src/anthropic/resources/beta/messages/batches.py
+++ b/src/anthropic/resources/beta/messages/batches.py
@@ -183,7 +183,7 @@ def list(
 
           limit: Number of items to return per page.
 
-              Defaults to `20`. Ranges from `1` to `100`.
+              Defaults to `20`. Ranges from `1` to `1000`.
 
           betas: Optional header to specify the beta version(s) you want to use.
 
@@ -500,7 +500,7 @@ def list(
 
           limit: Number of items to return per page.
 
-              Defaults to `20`. Ranges from `1` to `100`.
+              Defaults to `20`. Ranges from `1` to `1000`.
 
           betas: Optional header to specify the beta version(s) you want to use.
 
diff --git a/src/anthropic/resources/beta/models.py b/src/anthropic/resources/beta/models.py
new file mode 100644
index 00000000..fdad3298
--- /dev/null
+++ b/src/anthropic/resources/beta/models.py
@@ -0,0 +1,300 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage
+from ...types.beta import model_list_params
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.beta.beta_model_info import BetaModelInfo
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return self._get(
+            f"/v1/models/{model_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[BetaModelInfo]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models?beta=true",
+            page=SyncPage[BetaModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=BetaModelInfo,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BetaModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return await self._get(
+            f"/v1/models/{model_id}?beta=true",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BetaModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[BetaModelInfo, AsyncPage[BetaModelInfo]]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models?beta=true",
+            page=AsyncPage[BetaModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=BetaModelInfo,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
diff --git a/src/anthropic/resources/beta/prompt_caching/messages.py b/src/anthropic/resources/beta/prompt_caching/messages.py
deleted file mode 100644
index 1a5dac35..00000000
--- a/src/anthropic/resources/beta/prompt_caching/messages.py
+++ /dev/null
@@ -1,1954 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Iterable
-from functools import partial
-from itertools import chain
-from typing_extensions import Literal, overload
-
-import httpx
-
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    is_given,
-    required_args,
-    maybe_transform,
-    strip_not_given,
-    async_maybe_transform,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...._constants import DEFAULT_TIMEOUT
-from ...._streaming import Stream, AsyncStream
-from ...._base_client import make_request_options
-from ....lib.streaming import PromptCachingBetaMessageStreamManager, AsyncPromptCachingBetaMessageStreamManager
-from ....types.model_param import ModelParam
-from ....types.metadata_param import MetadataParam
-from ....types.tool_choice_param import ToolChoiceParam
-from ....types.beta.prompt_caching import message_create_params
-from ....types.anthropic_beta_param import AnthropicBetaParam
-from ....types.beta.prompt_caching.prompt_caching_beta_message import PromptCachingBetaMessage
-from ....types.beta.prompt_caching.prompt_caching_beta_tool_param import PromptCachingBetaToolParam
-from ....types.beta.prompt_caching.prompt_caching_beta_message_param import PromptCachingBetaMessageParam
-from ....types.beta.prompt_caching.prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-from ....types.beta.prompt_caching.raw_prompt_caching_beta_message_stream_event import (
-    RawPromptCachingBetaMessageStreamEvent,
-)
-
-__all__ = ["Messages", "AsyncMessages"]
-
-
-class Messages(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> MessagesWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return MessagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> MessagesWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return MessagesWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: Literal[True],
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: bool,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["prompt-caching-2024-07-31"]))
-                    if is_given(betas)
-                    else NOT_GIVEN
-                }
-            ),
-            **(extra_headers or {}),
-        }
-        extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
-        return self._post(
-            "/v1/messages?beta=prompt_caching",
-            body=maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": stream,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=stream or False,
-            stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
-        )
-
-    def stream(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessageStreamManager:
-        """Create a Message stream"""
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-
-        extra_headers = {
-            "anthropic-beta": "prompt-caching-2024-07-31",
-            "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
-            **(extra_headers or {}),
-        }
-        request = partial(
-            self._post,
-            "/v1/messages?beta=prompt_caching",
-            body=maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": True,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=True,
-            stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
-        )
-        return PromptCachingBetaMessageStreamManager(request)
-
-
-class AsyncMessages(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncMessagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return AsyncMessagesWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: Literal[True],
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        stream: bool,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
-        """
-        Send a structured list of input messages with text and/or image content, and the
-        model will generate the next message in the conversation.
-
-        The Messages API can be used for either single queries or stateless multi-turn
-        conversations.
-
-        Args:
-          max_tokens: The maximum number of tokens to generate before stopping.
-
-              Note that our models may stop _before_ reaching this maximum. This parameter
-              only specifies the absolute maximum number of tokens to generate.
-
-              Different models have different maximum values for this parameter. See
-              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-
-          messages: Input messages.
-
-              Our models are trained to operate on alternating `user` and `assistant`
-              conversational turns. When creating a new `Message`, you specify the prior
-              conversational turns with the `messages` parameter, and the model then generates
-              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
-              in your request will be combined into a single turn.
-
-              Each input message must be an object with a `role` and `content`. You can
-              specify a single `user`-role message, or you can include multiple `user` and
-              `assistant` messages.
-
-              If the final message uses the `assistant` role, the response content will
-              continue immediately from the content in that message. This can be used to
-              constrain part of the model's response.
-
-              Example with a single `user` message:
-
-              ```json
-              [{ "role": "user", "content": "Hello, Claude" }]
-              ```
-
-              Example with multiple conversational turns:
-
-              ```json
-              [
-                { "role": "user", "content": "Hello there." },
-                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
-                { "role": "user", "content": "Can you explain LLMs in plain English?" }
-              ]
-              ```
-
-              Example with a partially-filled response from Claude:
-
-              ```json
-              [
-                {
-                  "role": "user",
-                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-                },
-                { "role": "assistant", "content": "The best answer is (" }
-              ]
-              ```
-
-              Each input message `content` may be either a single `string` or an array of
-              content blocks, where each block has a specific `type`. Using a `string` for
-              `content` is shorthand for an array of one content block of type `"text"`. The
-              following input messages are equivalent:
-
-              ```json
-              { "role": "user", "content": "Hello, Claude" }
-              ```
-
-              ```json
-              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
-              ```
-
-              Starting with Claude 3 models, you can also send image content blocks:
-
-              ```json
-              {
-                "role": "user",
-                "content": [
-                  {
-                    "type": "image",
-                    "source": {
-                      "type": "base64",
-                      "media_type": "image/jpeg",
-                      "data": "/9j/4AAQSkZJRg..."
-                    }
-                  },
-                  { "type": "text", "text": "What is in this image?" }
-                ]
-              }
-              ```
-
-              We currently support the `base64` source type for images, and the `image/jpeg`,
-              `image/png`, `image/gif`, and `image/webp` media types.
-
-              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
-              more input examples.
-
-              Note that if you want to include a
-              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
-              the top-level `system` parameter — there is no `"system"` role for input
-              messages in the Messages API.
-
-          model: The model that will complete your prompt.\n\nSee
-              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-              details and options.
-
-          stream: Whether to incrementally stream the response using server-sent events.
-
-              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-              details.
-
-          metadata: An object describing metadata about the request.
-
-          stop_sequences: Custom text sequences that will cause the model to stop generating.
-
-              Our models will normally stop when they have naturally completed their turn,
-              which will result in a response `stop_reason` of `"end_turn"`.
-
-              If you want the model to stop generating when it encounters custom strings of
-              text, you can use the `stop_sequences` parameter. If the model encounters one of
-              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-              and the response `stop_sequence` value will contain the matched stop sequence.
-
-          system: System prompt.
-
-              A system prompt is a way of providing context and instructions to Claude, such
-              as specifying a particular goal or role. See our
-              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
-
-          temperature: Amount of randomness injected into the response.
-
-              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-              for analytical / multiple choice, and closer to `1.0` for creative and
-              generative tasks.
-
-              Note that even with `temperature` of `0.0`, the results will not be fully
-              deterministic.
-
-          tool_choice: How the model should use the provided tools. The model can use a specific tool,
-              any available tool, or decide by itself.
-
-          tools: Definitions of tools that the model may use.
-
-              If you include `tools` in your API request, the model may return `tool_use`
-              content blocks that represent the model's use of those tools. You can then run
-              those tools using the tool input generated by the model and then optionally
-              return results back to the model using `tool_result` content blocks.
-
-              Each tool definition includes:
-
-              - `name`: Name of the tool.
-              - `description`: Optional, but strongly-recommended description of the tool.
-              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
-                shape that the model will produce in `tool_use` output content blocks.
-
-              For example, if you defined `tools` as:
-
-              ```json
-              [
-                {
-                  "name": "get_stock_price",
-                  "description": "Get the current stock price for a given ticker symbol.",
-                  "input_schema": {
-                    "type": "object",
-                    "properties": {
-                      "ticker": {
-                        "type": "string",
-                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                      }
-                    },
-                    "required": ["ticker"]
-                  }
-                }
-              ]
-              ```
-
-              And then asked the model "What's the S&P 500 at today?", the model might produce
-              `tool_use` content blocks in the response like this:
-
-              ```json
-              [
-                {
-                  "type": "tool_use",
-                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "name": "get_stock_price",
-                  "input": { "ticker": "^GSPC" }
-                }
-              ]
-              ```
-
-              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
-              input, and return the following back to the model in a subsequent `user`
-              message:
-
-              ```json
-              [
-                {
-                  "type": "tool_result",
-                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
-                  "content": "259.75 USD"
-                }
-              ]
-              ```
-
-              Tools can be used for workflows that include running client-side tools and
-              functions, or more generally whenever you want the model to produce a particular
-              JSON structure of output.
-
-              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
-
-          top_k: Only sample from the top K options for each subsequent token.
-
-              Used to remove "long tail" low probability responses.
-              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          top_p: Use nucleus sampling.
-
-              In nucleus sampling, we compute the cumulative distribution over all the options
-              for each subsequent token in decreasing probability order and cut it off once it
-              reaches a particular probability specified by `top_p`. You should either alter
-              `temperature` or `top_p`, but not both.
-
-              Recommended for advanced use cases only. You usually only need to use
-              `temperature`.
-
-          betas: Optional header to specify the beta version(s) you want to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: MetadataParam | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        betas: List[AnthropicBetaParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "anthropic-beta": ",".join(chain((str(e) for e in betas), ["prompt-caching-2024-07-31"]))
-                    if is_given(betas)
-                    else NOT_GIVEN
-                }
-            ),
-            **(extra_headers or {}),
-        }
-        extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
-        return await self._post(
-            "/v1/messages?beta=prompt_caching",
-            body=await async_maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": stream,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=stream or False,
-            stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
-        )
-
-    def stream(
-        self,
-        *,
-        max_tokens: int,
-        messages: Iterable[PromptCachingBetaMessageParam],
-        model: ModelParam,
-        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
-        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
-        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
-        top_k: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPromptCachingBetaMessageStreamManager:
-        """Create a Message stream"""
-        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
-            timeout = 600
-
-        extra_headers = {
-            "anthropic-beta": "prompt-caching-2024-07-31",
-            "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            "/v1/messages?beta=prompt_caching",
-            body=maybe_transform(
-                {
-                    "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
-                    "metadata": metadata,
-                    "stop_sequences": stop_sequences,
-                    "stream": True,
-                    "system": system,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_k": top_k,
-                    "top_p": top_p,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PromptCachingBetaMessage,
-            stream=True,
-            stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
-        )
-        return AsyncPromptCachingBetaMessageStreamManager(request)
-
-
-class MessagesWithRawResponse:
-    def __init__(self, messages: Messages) -> None:
-        self._messages = messages
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            messages.create,
-        )
-
-
-class AsyncMessagesWithRawResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self._messages = messages
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            messages.create,
-        )
-
-
-class MessagesWithStreamingResponse:
-    def __init__(self, messages: Messages) -> None:
-        self._messages = messages
-
-        self.create = to_streamed_response_wrapper(
-            messages.create,
-        )
-
-
-class AsyncMessagesWithStreamingResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self._messages = messages
-
-        self.create = async_to_streamed_response_wrapper(
-            messages.create,
-        )
diff --git a/src/anthropic/resources/beta/prompt_caching/prompt_caching.py b/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
deleted file mode 100644
index 0154a0d3..00000000
--- a/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-    MessagesWithStreamingResponse,
-    AsyncMessagesWithStreamingResponse,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-
-__all__ = ["PromptCaching", "AsyncPromptCaching"]
-
-
-class PromptCaching(SyncAPIResource):
-    @cached_property
-    def messages(self) -> Messages:
-        return Messages(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> PromptCachingWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return PromptCachingWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> PromptCachingWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return PromptCachingWithStreamingResponse(self)
-
-
-class AsyncPromptCaching(AsyncAPIResource):
-    @cached_property
-    def messages(self) -> AsyncMessages:
-        return AsyncMessages(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncPromptCachingWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return the
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncPromptCachingWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncPromptCachingWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
-        """
-        return AsyncPromptCachingWithStreamingResponse(self)
-
-
-class PromptCachingWithRawResponse:
-    def __init__(self, prompt_caching: PromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> MessagesWithRawResponse:
-        return MessagesWithRawResponse(self._prompt_caching.messages)
-
-
-class AsyncPromptCachingWithRawResponse:
-    def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> AsyncMessagesWithRawResponse:
-        return AsyncMessagesWithRawResponse(self._prompt_caching.messages)
-
-
-class PromptCachingWithStreamingResponse:
-    def __init__(self, prompt_caching: PromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> MessagesWithStreamingResponse:
-        return MessagesWithStreamingResponse(self._prompt_caching.messages)
-
-
-class AsyncPromptCachingWithStreamingResponse:
-    def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
-        self._prompt_caching = prompt_caching
-
-    @cached_property
-    def messages(self) -> AsyncMessagesWithStreamingResponse:
-        return AsyncMessagesWithStreamingResponse(self._prompt_caching.messages)
diff --git a/src/anthropic/resources/beta/prompt_caching/__init__.py b/src/anthropic/resources/messages/__init__.py
similarity index 52%
rename from src/anthropic/resources/beta/prompt_caching/__init__.py
rename to src/anthropic/resources/messages/__init__.py
index ccf0b0a8..6e7cf9d9 100644
--- a/src/anthropic/resources/beta/prompt_caching/__init__.py
+++ b/src/anthropic/resources/messages/__init__.py
@@ -1,6 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
 from .messages import (
+    DEPRECATED_MODELS,
     Messages,
     AsyncMessages,
     MessagesWithRawResponse,
@@ -8,26 +17,19 @@
     MessagesWithStreamingResponse,
     AsyncMessagesWithStreamingResponse,
 )
-from .prompt_caching import (
-    PromptCaching,
-    AsyncPromptCaching,
-    PromptCachingWithRawResponse,
-    AsyncPromptCachingWithRawResponse,
-    PromptCachingWithStreamingResponse,
-    AsyncPromptCachingWithStreamingResponse,
-)
 
 __all__ = [
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
     "Messages",
     "AsyncMessages",
     "MessagesWithRawResponse",
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
-    "PromptCaching",
-    "AsyncPromptCaching",
-    "PromptCachingWithRawResponse",
-    "AsyncPromptCachingWithRawResponse",
-    "PromptCachingWithStreamingResponse",
-    "AsyncPromptCachingWithStreamingResponse",
+    "DEPRECATED_MODELS",
 ]
diff --git a/src/anthropic/resources/messages/batches.py b/src/anthropic/resources/messages/batches.py
new file mode 100644
index 00000000..7124e954
--- /dev/null
+++ b/src/anthropic/resources/messages/batches.py
@@ -0,0 +1,618 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_streamed_response_wrapper,
+    to_custom_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_raw_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...pagination import SyncPage, AsyncPage
+from ..._exceptions import AnthropicError
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.messages import MessageBatchIndividualResponse, batch_list_params, batch_create_params
+from ..._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
+from ...types.messages.message_batch import MessageBatch
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/batches",
+            body=maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[MessageBatch]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=SyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            stream=True,
+            cast_to=JSONLDecoder[MessageBatchIndividualResponse],
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        requests: Iterable[batch_create_params.Request],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """
+        Send a batch of Message creation requests.
+
+        The Message Batches API can be used to process multiple Messages API requests at
+        once. Once a Message Batch is created, it begins processing immediately. Batches
+        can take up to 24 hours to complete.
+
+        Args:
+          requests: List of requests for prompt completion. Each is an individual request to create
+              a Message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/batches",
+            body=await async_maybe_transform({"requests": requests}, batch_create_params.BatchCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def retrieve(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """This endpoint is idempotent and can be used to poll for Message Batch
+        completion.
+
+        To access the results of a Message Batch, make a request to the
+        `results_url` field in the response.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._get(
+            f"/v1/messages/batches/{message_batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[MessageBatch, AsyncPage[MessageBatch]]:
+        """List all Message Batches within a Workspace.
+
+        Most recently created batches are
+        returned first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/messages/batches",
+            page=AsyncPage[MessageBatch],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    batch_list_params.BatchListParams,
+                ),
+            ),
+            model=MessageBatch,
+        )
+
+    async def cancel(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageBatch:
+        """Batches may be canceled any time before processing ends.
+
+        Once cancellation is
+        initiated, the batch enters a `canceling` state, at which time the system may
+        complete any in-progress, non-interruptible requests before finalizing
+        cancellation.
+
+        The number of canceled requests is specified in `request_counts`. To determine
+        which requests were canceled, check the individual results within the batch.
+        Note that cancellation may not result in any canceled requests if they were
+        non-interruptible.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+        return await self._post(
+            f"/v1/messages/batches/{message_batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageBatch,
+        )
+
+    async def results(
+        self,
+        message_batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncJSONLDecoder[MessageBatchIndividualResponse]:
+        """
+        Streams the results of a Message Batch as a `.jsonl` file.
+
+        Each line in the file is a JSON object containing the result of a single request
+        in the Message Batch. Results are not guaranteed to be in the same order as
+        requests. Use the `custom_id` field to match results to requests.
+
+        Args:
+          message_batch_id: ID of the Message Batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not message_batch_id:
+            raise ValueError(f"Expected a non-empty value for `message_batch_id` but received {message_batch_id!r}")
+
+        batch = await self.retrieve(message_batch_id=message_batch_id)
+        if not batch.results_url:
+            raise AnthropicError(
+                f"No `results_url` for the given batch; Has it finished processing? {batch.processing_status}"
+            )
+
+        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
+        return await self._get(
+            batch.results_url,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            stream=True,
+            cast_to=AsyncJSONLDecoder[MessageBatchIndividualResponse],
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            batches.cancel,
+        )
+        self.results = to_custom_raw_response_wrapper(
+            batches.results,
+            BinaryAPIResponse,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+        self.results = async_to_custom_raw_response_wrapper(
+            batches.results,
+            AsyncBinaryAPIResponse,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+        self.results = to_custom_streamed_response_wrapper(
+            batches.results,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
+        self.results = async_to_custom_streamed_response_wrapper(
+            batches.results,
+            AsyncStreamedBinaryAPIResponse,
+        )
diff --git a/src/anthropic/resources/messages.py b/src/anthropic/resources/messages/messages.py
similarity index 80%
rename from src/anthropic/resources/messages.py
rename to src/anthropic/resources/messages/messages.py
index b5230807..88cc8605 100644
--- a/src/anthropic/resources/messages.py
+++ b/src/anthropic/resources/messages/messages.py
@@ -9,30 +9,39 @@
 
 import httpx
 
-from .. import _legacy_response
-from ..types import message_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
+from ... import _legacy_response
+from ...types import message_create_params, message_count_tokens_params
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
     is_given,
     required_args,
     maybe_transform,
     async_maybe_transform,
 )
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._constants import DEFAULT_TIMEOUT
-from .._streaming import Stream, AsyncStream
-from .._base_client import make_request_options
-from ..lib.streaming import MessageStreamManager, AsyncMessageStreamManager
-from ..types.message import Message
-from ..types.tool_param import ToolParam
-from ..types.model_param import ModelParam
-from ..types.message_param import MessageParam
-from ..types.metadata_param import MetadataParam
-from ..types.text_block_param import TextBlockParam
-from ..types.tool_choice_param import ToolChoiceParam
-from ..types.raw_message_stream_event import RawMessageStreamEvent
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._constants import DEFAULT_TIMEOUT
+from ..._streaming import Stream, AsyncStream
+from ..._base_client import make_request_options
+from ...lib.streaming import MessageStreamManager, AsyncMessageStreamManager
+from ...types.message import Message
+from ...types.tool_param import ToolParam
+from ...types.model_param import ModelParam
+from ...types.message_param import MessageParam
+from ...types.metadata_param import MetadataParam
+from ...types.text_block_param import TextBlockParam
+from ...types.tool_choice_param import ToolChoiceParam
+from ...types.message_tokens_count import MessageTokensCount
+from ...types.raw_message_stream_event import RawMessageStreamEvent
 
 __all__ = ["Messages", "AsyncMessages"]
 
@@ -47,6 +56,10 @@
 
 
 class Messages(SyncAPIResource):
+    @cached_property
+    def batches(self) -> Batches:
+        return Batches(self._client)
+
     @cached_property
     def with_raw_response(self) -> MessagesWithRawResponse:
         """
@@ -974,8 +987,229 @@ def stream(
         )
         return MessageStreamManager(make_request)
 
+    def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/messages/count_tokens",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
 
 class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def batches(self) -> AsyncBatches:
+        return AsyncBatches(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
         """
@@ -1902,6 +2136,223 @@ def stream(
         )
         return AsyncMessageStreamManager(request)
 
+    async def count_tokens(
+        self,
+        *,
+        messages: Iterable[MessageParam],
+        model: ModelParam,
+        system: Union[str, Iterable[TextBlockParam]] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoiceParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> MessageTokensCount:
+        """
+        Count the number of tokens in a Message.
+
+        The Token Count API can be used to count the number of tokens in a Message,
+        including tools, images, and documents, without creating it.
+
+        Args:
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation. Consecutive `user` or `assistant` turns
+              in your request will be combined into a single turn.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/messages/count_tokens",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "system": system,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                message_count_tokens_params.MessageCountTokensParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=MessageTokensCount,
+        )
+
 
 class MessagesWithRawResponse:
     def __init__(self, messages: Messages) -> None:
@@ -1910,6 +2361,13 @@ def __init__(self, messages: Messages) -> None:
         self.create = _legacy_response.to_raw_response_wrapper(
             messages.create,
         )
+        self.count_tokens = _legacy_response.to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithRawResponse:
+        return BatchesWithRawResponse(self._messages.batches)
 
 
 class AsyncMessagesWithRawResponse:
@@ -1919,6 +2377,13 @@ def __init__(self, messages: AsyncMessages) -> None:
         self.create = _legacy_response.async_to_raw_response_wrapper(
             messages.create,
         )
+        self.count_tokens = _legacy_response.async_to_raw_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithRawResponse:
+        return AsyncBatchesWithRawResponse(self._messages.batches)
 
 
 class MessagesWithStreamingResponse:
@@ -1928,6 +2393,13 @@ def __init__(self, messages: Messages) -> None:
         self.create = to_streamed_response_wrapper(
             messages.create,
         )
+        self.count_tokens = to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> BatchesWithStreamingResponse:
+        return BatchesWithStreamingResponse(self._messages.batches)
 
 
 class AsyncMessagesWithStreamingResponse:
@@ -1937,3 +2409,10 @@ def __init__(self, messages: AsyncMessages) -> None:
         self.create = async_to_streamed_response_wrapper(
             messages.create,
         )
+        self.count_tokens = async_to_streamed_response_wrapper(
+            messages.count_tokens,
+        )
+
+    @cached_property
+    def batches(self) -> AsyncBatchesWithStreamingResponse:
+        return AsyncBatchesWithStreamingResponse(self._messages.batches)
diff --git a/src/anthropic/resources/models.py b/src/anthropic/resources/models.py
new file mode 100644
index 00000000..aec102bf
--- /dev/null
+++ b/src/anthropic/resources/models.py
@@ -0,0 +1,300 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .. import _legacy_response
+from ..types import model_list_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..pagination import SyncPage, AsyncPage
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.model_info import ModelInfo
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return self._get(
+            f"/v1/models/{model_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[ModelInfo]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models",
+            page=SyncPage[ModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=ModelInfo,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/anthropics/anthropic-sdk-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelInfo:
+        """
+        Get a specific model.
+
+        The Models API response can be used to determine information about a specific
+        model or resolve a model alias to a model ID.
+
+        Args:
+          model_id: Model identifier or alias.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model_id:
+            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
+        return await self._get(
+            f"/v1/models/{model_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelInfo,
+        )
+
+    def list(
+        self,
+        *,
+        after_id: str | NotGiven = NOT_GIVEN,
+        before_id: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ModelInfo, AsyncPage[ModelInfo]]:
+        """
+        List available models.
+
+        The Models API response can be used to determine which models are available for
+        use in the API. More recently released models are listed first.
+
+        Args:
+          after_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately after this object.
+
+          before_id: ID of the object to use as a cursor for pagination. When provided, returns the
+              page of results immediately before this object.
+
+          limit: Number of items to return per page.
+
+              Defaults to `20`. Ranges from `1` to `1000`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/v1/models",
+            page=AsyncPage[ModelInfo],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after_id": after_id,
+                        "before_id": before_id,
+                        "limit": limit,
+                    },
+                    model_list_params.ModelListParams,
+                ),
+            ),
+            model=ModelInfo,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            models.list,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
diff --git a/src/anthropic/types/__init__.py b/src/anthropic/types/__init__.py
index 0125a215..a880b827 100644
--- a/src/anthropic/types/__init__.py
+++ b/src/anthropic/types/__init__.py
@@ -4,9 +4,23 @@
 
 from .model import Model as Model
 from .usage import Usage as Usage
+from .shared import (
+    ErrorObject as ErrorObject,
+    BillingError as BillingError,
+    ErrorResponse as ErrorResponse,
+    NotFoundError as NotFoundError,
+    APIErrorObject as APIErrorObject,
+    RateLimitError as RateLimitError,
+    OverloadedError as OverloadedError,
+    PermissionError as PermissionError,
+    AuthenticationError as AuthenticationError,
+    GatewayTimeoutError as GatewayTimeoutError,
+    InvalidRequestError as InvalidRequestError,
+)
 from .message import Message as Message
 from .beta_error import BetaError as BetaError
 from .completion import Completion as Completion
+from .model_info import ModelInfo as ModelInfo
 from .text_block import TextBlock as TextBlock
 from .text_delta import TextDelta as TextDelta
 from .tool_param import ToolParam as ToolParam
@@ -19,7 +33,9 @@
 from .input_json_delta import InputJSONDelta as InputJSONDelta
 from .text_block_param import TextBlockParam as TextBlockParam
 from .image_block_param import ImageBlockParam as ImageBlockParam
+from .model_list_params import ModelListParams as ModelListParams
 from .tool_choice_param import ToolChoiceParam as ToolChoiceParam
+from .beta_billing_error import BetaBillingError as BetaBillingError
 from .message_stop_event import MessageStopEvent as MessageStopEvent
 from .beta_error_response import BetaErrorResponse as BetaErrorResponse
 from .content_block_param import ContentBlockParam as ContentBlockParam
@@ -28,7 +44,9 @@
 from .message_start_event import MessageStartEvent as MessageStartEvent
 from .anthropic_beta_param import AnthropicBetaParam as AnthropicBetaParam
 from .beta_not_found_error import BetaNotFoundError as BetaNotFoundError
+from .document_block_param import DocumentBlockParam as DocumentBlockParam
 from .message_stream_event import MessageStreamEvent as MessageStreamEvent
+from .message_tokens_count import MessageTokensCount as MessageTokensCount
 from .tool_use_block_param import ToolUseBlockParam as ToolUseBlockParam
 from .beta_overloaded_error import BetaOverloadedError as BetaOverloadedError
 from .beta_permission_error import BetaPermissionError as BetaPermissionError
@@ -38,6 +56,7 @@
 from .raw_message_stop_event import RawMessageStopEvent as RawMessageStopEvent
 from .tool_choice_auto_param import ToolChoiceAutoParam as ToolChoiceAutoParam
 from .tool_choice_tool_param import ToolChoiceToolParam as ToolChoiceToolParam
+from .base64_pdf_source_param import Base64PDFSourceParam as Base64PDFSourceParam
 from .raw_message_delta_event import RawMessageDeltaEvent as RawMessageDeltaEvent
 from .raw_message_start_event import RawMessageStartEvent as RawMessageStartEvent
 from .tool_result_block_param import ToolResultBlockParam as ToolResultBlockParam
@@ -47,7 +66,10 @@
 from .beta_authentication_error import BetaAuthenticationError as BetaAuthenticationError
 from .content_block_delta_event import ContentBlockDeltaEvent as ContentBlockDeltaEvent
 from .content_block_start_event import ContentBlockStartEvent as ContentBlockStartEvent
+from .beta_gateway_timeout_error import BetaGatewayTimeoutError as BetaGatewayTimeoutError
 from .beta_invalid_request_error import BetaInvalidRequestError as BetaInvalidRequestError
+from .message_count_tokens_params import MessageCountTokensParams as MessageCountTokensParams
 from .raw_content_block_stop_event import RawContentBlockStopEvent as RawContentBlockStopEvent
+from .cache_control_ephemeral_param import CacheControlEphemeralParam as CacheControlEphemeralParam
 from .raw_content_block_delta_event import RawContentBlockDeltaEvent as RawContentBlockDeltaEvent
 from .raw_content_block_start_event import RawContentBlockStartEvent as RawContentBlockStartEvent
diff --git a/src/anthropic/types/base64_pdf_source_param.py b/src/anthropic/types/base64_pdf_source_param.py
new file mode 100644
index 00000000..ac247a19
--- /dev/null
+++ b/src/anthropic/types/base64_pdf_source_param.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from .._types import Base64FileInput
+from .._utils import PropertyInfo
+from .._models import set_pydantic_config
+
+__all__ = ["Base64PDFSourceParam"]
+
+
+class Base64PDFSourceParam(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["application/pdf"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(Base64PDFSourceParam, {"arbitrary_types_allowed": True})
diff --git a/src/anthropic/types/beta/__init__.py b/src/anthropic/types/beta/__init__.py
index cf5fd496..c233d9c7 100644
--- a/src/anthropic/types/beta/__init__.py
+++ b/src/anthropic/types/beta/__init__.py
@@ -4,9 +4,11 @@
 
 from .beta_usage import BetaUsage as BetaUsage
 from .beta_message import BetaMessage as BetaMessage
+from .beta_model_info import BetaModelInfo as BetaModelInfo
 from .beta_text_block import BetaTextBlock as BetaTextBlock
 from .beta_text_delta import BetaTextDelta as BetaTextDelta
 from .beta_tool_param import BetaToolParam as BetaToolParam
+from .model_list_params import ModelListParams as ModelListParams
 from .beta_content_block import BetaContentBlock as BetaContentBlock
 from .beta_message_param import BetaMessageParam as BetaMessageParam
 from .beta_metadata_param import BetaMetadataParam as BetaMetadataParam
diff --git a/src/anthropic/types/beta/beta_model_info.py b/src/anthropic/types/beta/beta_model_info.py
new file mode 100644
index 00000000..6ea50d9f
--- /dev/null
+++ b/src/anthropic/types/beta/beta_model_info.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BetaModelInfo"]
+
+
+class BetaModelInfo(BaseModel):
+    id: str
+    """Unique model identifier."""
+
+    created_at: datetime
+    """RFC 3339 datetime string representing the time at which the model was released.
+
+    May be set to an epoch value if the release date is unknown.
+    """
+
+    display_name: str
+    """A human-readable name for the model."""
+
+    type: Literal["model"]
+    """Object type.
+
+    For Models, this is always `"model"`.
+    """
diff --git a/src/anthropic/types/beta/beta_raw_content_block_delta_event.py b/src/anthropic/types/beta/beta_raw_content_block_delta_event.py
index 9e26688c..03ce6557 100644
--- a/src/anthropic/types/beta/beta_raw_content_block_delta_event.py
+++ b/src/anthropic/types/beta/beta_raw_content_block_delta_event.py
@@ -1,16 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
+from typing_extensions import Literal, TypeAlias
 
-from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .beta_text_delta import BetaTextDelta
 from .beta_input_json_delta import BetaInputJSONDelta
 
 __all__ = ["BetaRawContentBlockDeltaEvent", "Delta"]
 
-Delta: TypeAlias = Annotated[Union[BetaTextDelta, BetaInputJSONDelta], PropertyInfo(discriminator="type")]
+Delta: TypeAlias = Union[BetaTextDelta, BetaInputJSONDelta]
 
 
 class BetaRawContentBlockDeltaEvent(BaseModel):
diff --git a/src/anthropic/types/beta/messages/batch_list_params.py b/src/anthropic/types/beta/messages/batch_list_params.py
index b75cd931..3f406251 100644
--- a/src/anthropic/types/beta/messages/batch_list_params.py
+++ b/src/anthropic/types/beta/messages/batch_list_params.py
@@ -27,7 +27,7 @@ class BatchListParams(TypedDict, total=False):
     limit: int
     """Number of items to return per page.
 
-    Defaults to `20`. Ranges from `1` to `100`.
+    Defaults to `20`. Ranges from `1` to `1000`.
     """
 
     betas: Annotated[List[AnthropicBetaParam], PropertyInfo(alias="anthropic-beta")]
diff --git a/src/anthropic/types/beta/model_list_params.py b/src/anthropic/types/beta/model_list_params.py
new file mode 100644
index 00000000..b16d22a3
--- /dev/null
+++ b/src/anthropic/types/beta/model_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/src/anthropic/types/beta/prompt_caching/__init__.py b/src/anthropic/types/beta/prompt_caching/__init__.py
deleted file mode 100644
index 3b4004fc..00000000
--- a/src/anthropic/types/beta/prompt_caching/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .message_create_params import MessageCreateParams as MessageCreateParams
-from .prompt_caching_beta_usage import PromptCachingBetaUsage as PromptCachingBetaUsage
-from .prompt_caching_beta_message import PromptCachingBetaMessage as PromptCachingBetaMessage
-from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam as PromptCachingBetaToolParam
-from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam as PromptCachingBetaMessageParam
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam as PromptCachingBetaTextBlockParam
-from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam as PromptCachingBetaImageBlockParam
-from .prompt_caching_beta_tool_use_block_param import (
-    PromptCachingBetaToolUseBlockParam as PromptCachingBetaToolUseBlockParam,
-)
-from .prompt_caching_beta_tool_result_block_param import (
-    PromptCachingBetaToolResultBlockParam as PromptCachingBetaToolResultBlockParam,
-)
-from .raw_prompt_caching_beta_message_start_event import (
-    RawPromptCachingBetaMessageStartEvent as RawPromptCachingBetaMessageStartEvent,
-)
-from .raw_prompt_caching_beta_message_stream_event import (
-    RawPromptCachingBetaMessageStreamEvent as RawPromptCachingBetaMessageStreamEvent,
-)
-from .prompt_caching_beta_cache_control_ephemeral_param import (
-    PromptCachingBetaCacheControlEphemeralParam as PromptCachingBetaCacheControlEphemeralParam,
-)
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
deleted file mode 100644
index 02dfb0bc..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, Annotated, TypedDict
-
-from ...._types import Base64FileInput
-from ...._utils import PropertyInfo
-from ...._models import set_pydantic_config
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaImageBlockParam", "Source"]
-
-
-class Source(TypedDict, total=False):
-    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
-
-    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
-
-    type: Required[Literal["base64"]]
-
-
-set_pydantic_config(Source, {"arbitrary_types_allowed": True})
-
-
-class PromptCachingBetaImageBlockParam(TypedDict, total=False):
-    source: Required[Source]
-
-    type: Required[Literal["image"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
deleted file mode 100644
index 2cc49a2c..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ...model import Model
-from ...._models import BaseModel
-from ...content_block import ContentBlock
-from .prompt_caching_beta_usage import PromptCachingBetaUsage
-
-__all__ = ["PromptCachingBetaMessage"]
-
-
-class PromptCachingBetaMessage(BaseModel):
-    id: str
-    """Unique object identifier.
-
-    The format and length of IDs may change over time.
-    """
-
-    content: List[ContentBlock]
-    """Content generated by the model.
-
-    This is an array of content blocks, each of which has a `type` that determines
-    its shape.
-
-    Example:
-
-    ```json
-    [{ "type": "text", "text": "Hi, I'm Claude." }]
-    ```
-
-    If the request input `messages` ended with an `assistant` turn, then the
-    response `content` will continue directly from that last turn. You can use this
-    to constrain the model's output.
-
-    For example, if the input `messages` were:
-
-    ```json
-    [
-      {
-        "role": "user",
-        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
-      },
-      { "role": "assistant", "content": "The best answer is (" }
-    ]
-    ```
-
-    Then the response `content` might be:
-
-    ```json
-    [{ "type": "text", "text": "B)" }]
-    ```
-    """
-
-    model: Model
-    """
-    The model that will complete your prompt.\n\nSee
-    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
-    details and options.
-    """
-
-    role: Literal["assistant"]
-    """Conversational role of the generated message.
-
-    This will always be `"assistant"`.
-    """
-
-    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
-    """The reason that we stopped.
-
-    This may be one the following values:
-
-    - `"end_turn"`: the model reached a natural stopping point
-    - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
-    - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
-    - `"tool_use"`: the model invoked one or more tools
-
-    In non-streaming mode this value is always non-null. In streaming mode, it is
-    null in the `message_start` event and non-null otherwise.
-    """
-
-    stop_sequence: Optional[str] = None
-    """Which custom stop sequence was generated, if any.
-
-    This value will be a non-null string if one of your custom stop sequences was
-    generated.
-    """
-
-    type: Literal["message"]
-    """Object type.
-
-    For Messages, this is always `"message"`.
-    """
-
-    usage: PromptCachingBetaUsage
-    """Billing and rate-limit usage.
-
-    Anthropic's API bills and rate-limits by token counts, as tokens represent the
-    underlying cost to our systems.
-
-    Under the hood, the API transforms requests into a format suitable for the
-    model. The model's output then goes through a parsing stage before becoming an
-    API response. As a result, the token counts in `usage` will not match one-to-one
-    with the exact visible content of an API request or response.
-
-    For example, `output_tokens` will be non-zero, even for an empty string response
-    from Claude.
-    """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
deleted file mode 100644
index f88093e2..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from ...content_block import ContentBlock
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
-from .prompt_caching_beta_tool_use_block_param import PromptCachingBetaToolUseBlockParam
-from .prompt_caching_beta_tool_result_block_param import PromptCachingBetaToolResultBlockParam
-
-__all__ = ["PromptCachingBetaMessageParam"]
-
-
-class PromptCachingBetaMessageParam(TypedDict, total=False):
-    content: Required[
-        Union[
-            str,
-            Iterable[
-                Union[
-                    PromptCachingBetaTextBlockParam,
-                    PromptCachingBetaImageBlockParam,
-                    PromptCachingBetaToolUseBlockParam,
-                    PromptCachingBetaToolResultBlockParam,
-                    ContentBlock,
-                ]
-            ],
-        ]
-    ]
-
-    role: Required[Literal["user", "assistant"]]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
deleted file mode 100644
index cbb463d2..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaTextBlockParam"]
-
-
-class PromptCachingBetaTextBlockParam(TypedDict, total=False):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
deleted file mode 100644
index cfd9f8aa..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaToolParam", "InputSchema"]
-
-
-class InputSchemaTyped(TypedDict, total=False):
-    type: Required[Literal["object"]]
-
-    properties: Optional[object]
-
-
-InputSchema: TypeAlias = Union[InputSchemaTyped, Dict[str, object]]
-
-
-class PromptCachingBetaToolParam(TypedDict, total=False):
-    input_schema: Required[InputSchema]
-    """[JSON schema](https://json-schema.org/) for this tool's input.
-
-    This defines the shape of the `input` that your tool accepts and that the model
-    will produce.
-    """
-
-    name: Required[str]
-    """Name of the tool.
-
-    This is how the tool will be called by the model and in tool_use blocks.
-    """
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
-
-    description: str
-    """Description of what this tool does.
-
-    Tool descriptions should be as detailed as possible. The more information that
-    the model has about what the tool is and how to use it, the better it will
-    perform. You can use natural language descriptions to reinforce important
-    aspects of the tool input JSON schema.
-    """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
deleted file mode 100644
index 6c1ca718..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaToolResultBlockParam", "Content"]
-
-Content: TypeAlias = Union[PromptCachingBetaTextBlockParam, PromptCachingBetaImageBlockParam]
-
-
-class PromptCachingBetaToolResultBlockParam(TypedDict, total=False):
-    tool_use_id: Required[str]
-
-    type: Required[Literal["tool_result"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
-
-    content: Union[str, Iterable[Content]]
-
-    is_error: bool
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
deleted file mode 100644
index 35ccf446..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
-
-__all__ = ["PromptCachingBetaToolUseBlockParam"]
-
-
-class PromptCachingBetaToolUseBlockParam(TypedDict, total=False):
-    id: Required[str]
-
-    input: Required[object]
-
-    name: Required[str]
-
-    type: Required[Literal["tool_use"]]
-
-    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
deleted file mode 100644
index 20d23004..00000000
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ...._models import BaseModel
-
-__all__ = ["PromptCachingBetaUsage"]
-
-
-class PromptCachingBetaUsage(BaseModel):
-    cache_creation_input_tokens: Optional[int] = None
-    """The number of input tokens used to create the cache entry."""
-
-    cache_read_input_tokens: Optional[int] = None
-    """The number of input tokens read from the cache."""
-
-    input_tokens: int
-    """The number of input tokens which were used."""
-
-    output_tokens: int
-    """The number of output tokens which were used."""
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
deleted file mode 100644
index 9d055e22..00000000
--- a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .prompt_caching_beta_message import PromptCachingBetaMessage
-
-__all__ = ["RawPromptCachingBetaMessageStartEvent"]
-
-
-class RawPromptCachingBetaMessageStartEvent(BaseModel):
-    message: PromptCachingBetaMessage
-
-    type: Literal["message_start"]
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
deleted file mode 100644
index 58099baf..00000000
--- a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
-
-from ...._utils import PropertyInfo
-from ...raw_message_stop_event import RawMessageStopEvent
-from ...raw_message_delta_event import RawMessageDeltaEvent
-from ...raw_content_block_stop_event import RawContentBlockStopEvent
-from ...raw_content_block_delta_event import RawContentBlockDeltaEvent
-from ...raw_content_block_start_event import RawContentBlockStartEvent
-from .raw_prompt_caching_beta_message_start_event import RawPromptCachingBetaMessageStartEvent
-
-__all__ = ["RawPromptCachingBetaMessageStreamEvent"]
-
-RawPromptCachingBetaMessageStreamEvent: TypeAlias = Annotated[
-    Union[
-        RawPromptCachingBetaMessageStartEvent,
-        RawMessageDeltaEvent,
-        RawMessageStopEvent,
-        RawContentBlockStartEvent,
-        RawContentBlockDeltaEvent,
-        RawContentBlockStopEvent,
-    ],
-    PropertyInfo(discriminator="type"),
-]
diff --git a/src/anthropic/types/beta_billing_error.py b/src/anthropic/types/beta_billing_error.py
new file mode 100644
index 00000000..1ab37614
--- /dev/null
+++ b/src/anthropic/types/beta_billing_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaBillingError"]
+
+
+class BetaBillingError(BaseModel):
+    message: str
+
+    type: Literal["billing_error"]
diff --git a/src/anthropic/types/beta_error.py b/src/anthropic/types/beta_error.py
index 4d870ff4..029d80dc 100644
--- a/src/anthropic/types/beta_error.py
+++ b/src/anthropic/types/beta_error.py
@@ -5,11 +5,13 @@
 
 from .._utils import PropertyInfo
 from .beta_api_error import BetaAPIError
+from .beta_billing_error import BetaBillingError
 from .beta_not_found_error import BetaNotFoundError
 from .beta_overloaded_error import BetaOverloadedError
 from .beta_permission_error import BetaPermissionError
 from .beta_rate_limit_error import BetaRateLimitError
 from .beta_authentication_error import BetaAuthenticationError
+from .beta_gateway_timeout_error import BetaGatewayTimeoutError
 from .beta_invalid_request_error import BetaInvalidRequestError
 
 __all__ = ["BetaError"]
@@ -18,9 +20,11 @@
     Union[
         BetaInvalidRequestError,
         BetaAuthenticationError,
+        BetaBillingError,
         BetaPermissionError,
         BetaNotFoundError,
         BetaRateLimitError,
+        BetaGatewayTimeoutError,
         BetaAPIError,
         BetaOverloadedError,
     ],
diff --git a/src/anthropic/types/beta_gateway_timeout_error.py b/src/anthropic/types/beta_gateway_timeout_error.py
new file mode 100644
index 00000000..9a29705b
--- /dev/null
+++ b/src/anthropic/types/beta_gateway_timeout_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["BetaGatewayTimeoutError"]
+
+
+class BetaGatewayTimeoutError(BaseModel):
+    message: str
+
+    type: Literal["timeout_error"]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py b/src/anthropic/types/cache_control_ephemeral_param.py
similarity index 62%
rename from src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
rename to src/anthropic/types/cache_control_ephemeral_param.py
index 8370b938..8900071e 100644
--- a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
+++ b/src/anthropic/types/cache_control_ephemeral_param.py
@@ -4,8 +4,8 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["PromptCachingBetaCacheControlEphemeralParam"]
+__all__ = ["CacheControlEphemeralParam"]
 
 
-class PromptCachingBetaCacheControlEphemeralParam(TypedDict, total=False):
+class CacheControlEphemeralParam(TypedDict, total=False):
     type: Required[Literal["ephemeral"]]
diff --git a/src/anthropic/types/content_block_param.py b/src/anthropic/types/content_block_param.py
index 65e9bd4a..836a5e19 100644
--- a/src/anthropic/types/content_block_param.py
+++ b/src/anthropic/types/content_block_param.py
@@ -7,9 +7,12 @@
 
 from .text_block_param import TextBlockParam
 from .image_block_param import ImageBlockParam
+from .document_block_param import DocumentBlockParam
 from .tool_use_block_param import ToolUseBlockParam
 from .tool_result_block_param import ToolResultBlockParam
 
 __all__ = ["ContentBlockParam"]
 
-ContentBlockParam: TypeAlias = Union[TextBlockParam, ImageBlockParam, ToolUseBlockParam, ToolResultBlockParam]
+ContentBlockParam: TypeAlias = Union[
+    TextBlockParam, ImageBlockParam, ToolUseBlockParam, ToolResultBlockParam, DocumentBlockParam
+]
diff --git a/src/anthropic/types/document_block_param.py b/src/anthropic/types/document_block_param.py
new file mode 100644
index 00000000..57522e93
--- /dev/null
+++ b/src/anthropic/types/document_block_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .base64_pdf_source_param import Base64PDFSourceParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
+__all__ = ["DocumentBlockParam"]
+
+
+class DocumentBlockParam(TypedDict, total=False):
+    source: Required[Base64PDFSourceParam]
+
+    type: Required[Literal["document"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/image_block_param.py b/src/anthropic/types/image_block_param.py
index d7f46fa9..bfd8c18e 100644
--- a/src/anthropic/types/image_block_param.py
+++ b/src/anthropic/types/image_block_param.py
@@ -2,12 +2,13 @@
 
 from __future__ import annotations
 
-from typing import Union
+from typing import Union, Optional
 from typing_extensions import Literal, Required, Annotated, TypedDict
 
 from .._types import Base64FileInput
 from .._utils import PropertyInfo
 from .._models import set_pydantic_config
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
 
 __all__ = ["ImageBlockParam", "Source"]
 
@@ -27,3 +28,5 @@ class ImageBlockParam(TypedDict, total=False):
     source: Required[Source]
 
     type: Required[Literal["image"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/message_create_params.py b/src/anthropic/types/message_count_tokens_params.py
similarity index 55%
rename from src/anthropic/types/beta/prompt_caching/message_create_params.py
rename to src/anthropic/types/message_count_tokens_params.py
index c95a0bbb..c3afbf36 100644
--- a/src/anthropic/types/beta/prompt_caching/message_create_params.py
+++ b/src/anthropic/types/message_count_tokens_params.py
@@ -2,43 +2,20 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ...model_param import ModelParam
-from ...metadata_param import MetadataParam
-from ...tool_choice_param import ToolChoiceParam
-from ...tool_choice_any_param import ToolChoiceAnyParam
-from ...tool_choice_auto_param import ToolChoiceAutoParam
-from ...tool_choice_tool_param import ToolChoiceToolParam
-from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam
-from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam
-from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
-
-__all__ = [
-    "MessageCreateParamsBase",
-    "Metadata",
-    "ToolChoice",
-    "ToolChoiceToolChoiceAuto",
-    "ToolChoiceToolChoiceAny",
-    "ToolChoiceToolChoiceTool",
-    "MessageCreateParamsNonStreaming",
-    "MessageCreateParamsStreaming",
-]
-
-
-class MessageCreateParamsBase(TypedDict, total=False):
-    max_tokens: Required[int]
-    """The maximum number of tokens to generate before stopping.
-
-    Note that our models may stop _before_ reaching this maximum. This parameter
-    only specifies the absolute maximum number of tokens to generate.
-
-    Different models have different maximum values for this parameter. See
-    [models](https://docs.anthropic.com/en/docs/models-overview) for details.
-    """
+from typing import Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from .tool_param import ToolParam
+from .model_param import ModelParam
+from .message_param import MessageParam
+from .text_block_param import TextBlockParam
+from .tool_choice_param import ToolChoiceParam
+
+__all__ = ["MessageCountTokensParams"]
 
-    messages: Required[Iterable[PromptCachingBetaMessageParam]]
+
+class MessageCountTokensParams(TypedDict, total=False):
+    messages: Required[Iterable[MessageParam]]
     """Input messages.
 
     Our models are trained to operate on alternating `user` and `assistant`
@@ -134,22 +111,7 @@ class MessageCreateParamsBase(TypedDict, total=False):
     details and options.
     """
 
-    metadata: MetadataParam
-    """An object describing metadata about the request."""
-
-    stop_sequences: List[str]
-    """Custom text sequences that will cause the model to stop generating.
-
-    Our models will normally stop when they have naturally completed their turn,
-    which will result in a response `stop_reason` of `"end_turn"`.
-
-    If you want the model to stop generating when it encounters custom strings of
-    text, you can use the `stop_sequences` parameter. If the model encounters one of
-    the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
-    and the response `stop_sequence` value will contain the matched stop sequence.
-    """
-
-    system: Union[str, Iterable[PromptCachingBetaTextBlockParam]]
+    system: Union[str, Iterable[TextBlockParam]]
     """System prompt.
 
     A system prompt is a way of providing context and instructions to Claude, such
@@ -157,24 +119,13 @@ class MessageCreateParamsBase(TypedDict, total=False):
     [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
     """
 
-    temperature: float
-    """Amount of randomness injected into the response.
-
-    Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
-    for analytical / multiple choice, and closer to `1.0` for creative and
-    generative tasks.
-
-    Note that even with `temperature` of `0.0`, the results will not be fully
-    deterministic.
-    """
-
     tool_choice: ToolChoiceParam
     """How the model should use the provided tools.
 
     The model can use a specific tool, any available tool, or decide by itself.
     """
 
-    tools: Iterable[PromptCachingBetaToolParam]
+    tools: Iterable[ToolParam]
     """Definitions of tools that the model may use.
 
     If you include `tools` in your API request, the model may return `tool_use`
@@ -244,62 +195,3 @@ class MessageCreateParamsBase(TypedDict, total=False):
 
     See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
     """
-
-    top_k: int
-    """Only sample from the top K options for each subsequent token.
-
-    Used to remove "long tail" low probability responses.
-    [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
-
-    Recommended for advanced use cases only. You usually only need to use
-    `temperature`.
-    """
-
-    top_p: float
-    """Use nucleus sampling.
-
-    In nucleus sampling, we compute the cumulative distribution over all the options
-    for each subsequent token in decreasing probability order and cut it off once it
-    reaches a particular probability specified by `top_p`. You should either alter
-    `temperature` or `top_p`, but not both.
-
-    Recommended for advanced use cases only. You usually only need to use
-    `temperature`.
-    """
-
-
-Metadata: TypeAlias = MetadataParam
-"""This is deprecated, `MetadataParam` should be used instead"""
-
-ToolChoice: TypeAlias = ToolChoiceParam
-"""This is deprecated, `ToolChoiceParam` should be used instead"""
-
-ToolChoiceToolChoiceAuto: TypeAlias = ToolChoiceAutoParam
-"""This is deprecated, `ToolChoiceAutoParam` should be used instead"""
-
-ToolChoiceToolChoiceAny: TypeAlias = ToolChoiceAnyParam
-"""This is deprecated, `ToolChoiceAnyParam` should be used instead"""
-
-ToolChoiceToolChoiceTool: TypeAlias = ToolChoiceToolParam
-"""This is deprecated, `ToolChoiceToolParam` should be used instead"""
-
-
-class MessageCreateParamsNonStreaming(MessageCreateParamsBase, total=False):
-    stream: Literal[False]
-    """Whether to incrementally stream the response using server-sent events.
-
-    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-    details.
-    """
-
-
-class MessageCreateParamsStreaming(MessageCreateParamsBase):
-    stream: Required[Literal[True]]
-    """Whether to incrementally stream the response using server-sent events.
-
-    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
-    details.
-    """
-
-
-MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/src/anthropic/types/message_param.py b/src/anthropic/types/message_param.py
index 89921c61..811fc7b5 100644
--- a/src/anthropic/types/message_param.py
+++ b/src/anthropic/types/message_param.py
@@ -8,6 +8,7 @@
 from .content_block import ContentBlock
 from .text_block_param import TextBlockParam
 from .image_block_param import ImageBlockParam
+from .document_block_param import DocumentBlockParam
 from .tool_use_block_param import ToolUseBlockParam
 from .tool_result_block_param import ToolResultBlockParam
 
@@ -17,7 +18,17 @@
 class MessageParam(TypedDict, total=False):
     content: Required[
         Union[
-            str, Iterable[Union[TextBlockParam, ImageBlockParam, ToolUseBlockParam, ToolResultBlockParam, ContentBlock]]
+            str,
+            Iterable[
+                Union[
+                    TextBlockParam,
+                    ImageBlockParam,
+                    ToolUseBlockParam,
+                    ToolResultBlockParam,
+                    DocumentBlockParam,
+                    ContentBlock,
+                ]
+            ],
         ]
     ]
 
diff --git a/src/anthropic/types/message_tokens_count.py b/src/anthropic/types/message_tokens_count.py
new file mode 100644
index 00000000..d570019f
--- /dev/null
+++ b/src/anthropic/types/message_tokens_count.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["MessageTokensCount"]
+
+
+class MessageTokensCount(BaseModel):
+    input_tokens: int
+    """
+    The total number of tokens across the provided list of messages, system prompt,
+    and tools.
+    """
diff --git a/src/anthropic/types/messages/__init__.py b/src/anthropic/types/messages/__init__.py
new file mode 100644
index 00000000..c316f0ec
--- /dev/null
+++ b/src/anthropic/types/messages/__init__.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_batch import MessageBatch as MessageBatch
+from .batch_list_params import BatchListParams as BatchListParams
+from .batch_create_params import BatchCreateParams as BatchCreateParams
+from .message_batch_result import MessageBatchResult as MessageBatchResult
+from .message_batch_errored_result import MessageBatchErroredResult as MessageBatchErroredResult
+from .message_batch_expired_result import MessageBatchExpiredResult as MessageBatchExpiredResult
+from .message_batch_request_counts import MessageBatchRequestCounts as MessageBatchRequestCounts
+from .message_batch_canceled_result import MessageBatchCanceledResult as MessageBatchCanceledResult
+from .message_batch_succeeded_result import MessageBatchSucceededResult as MessageBatchSucceededResult
+from .message_batch_individual_response import MessageBatchIndividualResponse as MessageBatchIndividualResponse
diff --git a/src/anthropic/types/messages/batch_create_params.py b/src/anthropic/types/messages/batch_create_params.py
new file mode 100644
index 00000000..a82a5ff0
--- /dev/null
+++ b/src/anthropic/types/messages/batch_create_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Required, TypedDict
+
+from ..message_create_params import MessageCreateParamsNonStreaming
+
+__all__ = ["BatchCreateParams", "Request"]
+
+
+class BatchCreateParams(TypedDict, total=False):
+    requests: Required[Iterable[Request]]
+    """List of requests for prompt completion.
+
+    Each is an individual request to create a Message.
+    """
+
+
+class Request(TypedDict, total=False):
+    custom_id: Required[str]
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    params: Required[MessageCreateParamsNonStreaming]
+    """Messages API creation parameters for the individual request.
+
+    See the [Messages API reference](/en/api/messages) for full documentation on
+    available parameters.
+    """
diff --git a/src/anthropic/types/messages/batch_list_params.py b/src/anthropic/types/messages/batch_list_params.py
new file mode 100644
index 00000000..7b290a77
--- /dev/null
+++ b/src/anthropic/types/messages/batch_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["BatchListParams"]
+
+
+class BatchListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/src/anthropic/types/messages/message_batch.py b/src/anthropic/types/messages/message_batch.py
new file mode 100644
index 00000000..a03e73e1
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch.py
@@ -0,0 +1,77 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .message_batch_request_counts import MessageBatchRequestCounts
+
+__all__ = ["MessageBatch"]
+
+
+class MessageBatch(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    archived_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    archived and its results became unavailable.
+    """
+
+    cancel_initiated_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which cancellation was
+    initiated for the Message Batch. Specified only if cancellation was initiated.
+    """
+
+    created_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch was
+    created.
+    """
+
+    ended_at: Optional[datetime] = None
+    """
+    RFC 3339 datetime string representing the time at which processing for the
+    Message Batch ended. Specified only once processing ends.
+
+    Processing ends when every request in a Message Batch has either succeeded,
+    errored, canceled, or expired.
+    """
+
+    expires_at: datetime
+    """
+    RFC 3339 datetime string representing the time at which the Message Batch will
+    expire and end processing, which is 24 hours after creation.
+    """
+
+    processing_status: Literal["in_progress", "canceling", "ended"]
+    """Processing status of the Message Batch."""
+
+    request_counts: MessageBatchRequestCounts
+    """Tallies requests within the Message Batch, categorized by their status.
+
+    Requests start as `processing` and move to one of the other statuses only once
+    processing of the entire batch ends. The sum of all values always matches the
+    total number of requests in the batch.
+    """
+
+    results_url: Optional[str] = None
+    """URL to a `.jsonl` file containing the results of the Message Batch requests.
+
+    Specified only once processing ends.
+
+    Results in the file are not guaranteed to be in the same order as requests. Use
+    the `custom_id` field to match results to requests.
+    """
+
+    type: Literal["message_batch"]
+    """Object type.
+
+    For Message Batches, this is always `"message_batch"`.
+    """
diff --git a/src/anthropic/types/messages/message_batch_canceled_result.py b/src/anthropic/types/messages/message_batch_canceled_result.py
new file mode 100644
index 00000000..9826aa91
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_canceled_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchCanceledResult"]
+
+
+class MessageBatchCanceledResult(BaseModel):
+    type: Literal["canceled"]
diff --git a/src/anthropic/types/messages/message_batch_errored_result.py b/src/anthropic/types/messages/message_batch_errored_result.py
new file mode 100644
index 00000000..5f890bfd
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_errored_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.error_response import ErrorResponse
+
+__all__ = ["MessageBatchErroredResult"]
+
+
+class MessageBatchErroredResult(BaseModel):
+    error: ErrorResponse
+
+    type: Literal["errored"]
diff --git a/src/anthropic/types/messages/message_batch_expired_result.py b/src/anthropic/types/messages/message_batch_expired_result.py
new file mode 100644
index 00000000..ab9964e7
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_expired_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchExpiredResult"]
+
+
+class MessageBatchExpiredResult(BaseModel):
+    type: Literal["expired"]
diff --git a/src/anthropic/types/messages/message_batch_individual_response.py b/src/anthropic/types/messages/message_batch_individual_response.py
new file mode 100644
index 00000000..19d4f090
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_individual_response.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+from .message_batch_result import MessageBatchResult
+
+__all__ = ["MessageBatchIndividualResponse"]
+
+
+class MessageBatchIndividualResponse(BaseModel):
+    custom_id: str
+    """Developer-provided ID created for each request in a Message Batch.
+
+    Useful for matching results to requests, as results may be given out of request
+    order.
+
+    Must be unique for each request within the Message Batch.
+    """
+
+    result: MessageBatchResult
+    """Processing result for this request.
+
+    Contains a Message output if processing was successful, an error response if
+    processing failed, or the reason why processing was not attempted, such as
+    cancellation or expiration.
+    """
diff --git a/src/anthropic/types/messages/message_batch_request_counts.py b/src/anthropic/types/messages/message_batch_request_counts.py
new file mode 100644
index 00000000..04edc3c3
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_request_counts.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchRequestCounts"]
+
+
+class MessageBatchRequestCounts(BaseModel):
+    canceled: int
+    """Number of requests in the Message Batch that have been canceled.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    errored: int
+    """Number of requests in the Message Batch that encountered an error.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    expired: int
+    """Number of requests in the Message Batch that have expired.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
+
+    processing: int
+    """Number of requests in the Message Batch that are processing."""
+
+    succeeded: int
+    """Number of requests in the Message Batch that have completed successfully.
+
+    This is zero until processing of the entire Message Batch has ended.
+    """
diff --git a/src/anthropic/types/messages/message_batch_result.py b/src/anthropic/types/messages/message_batch_result.py
new file mode 100644
index 00000000..3186f2aa
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_result.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .message_batch_errored_result import MessageBatchErroredResult
+from .message_batch_expired_result import MessageBatchExpiredResult
+from .message_batch_canceled_result import MessageBatchCanceledResult
+from .message_batch_succeeded_result import MessageBatchSucceededResult
+
+__all__ = ["MessageBatchResult"]
+
+MessageBatchResult: TypeAlias = Annotated[
+    Union[
+        MessageBatchSucceededResult, MessageBatchErroredResult, MessageBatchCanceledResult, MessageBatchExpiredResult
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/anthropic/types/messages/message_batch_succeeded_result.py b/src/anthropic/types/messages/message_batch_succeeded_result.py
new file mode 100644
index 00000000..1cc454a4
--- /dev/null
+++ b/src/anthropic/types/messages/message_batch_succeeded_result.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..message import Message
+from ..._models import BaseModel
+
+__all__ = ["MessageBatchSucceededResult"]
+
+
+class MessageBatchSucceededResult(BaseModel):
+    message: Message
+
+    type: Literal["succeeded"]
diff --git a/src/anthropic/types/model_info.py b/src/anthropic/types/model_info.py
new file mode 100644
index 00000000..0e3945fe
--- /dev/null
+++ b/src/anthropic/types/model_info.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ModelInfo"]
+
+
+class ModelInfo(BaseModel):
+    id: str
+    """Unique model identifier."""
+
+    created_at: datetime
+    """RFC 3339 datetime string representing the time at which the model was released.
+
+    May be set to an epoch value if the release date is unknown.
+    """
+
+    display_name: str
+    """A human-readable name for the model."""
+
+    type: Literal["model"]
+    """Object type.
+
+    For Models, this is always `"model"`.
+    """
diff --git a/src/anthropic/types/model_list_params.py b/src/anthropic/types/model_list_params.py
new file mode 100644
index 00000000..b16d22a3
--- /dev/null
+++ b/src/anthropic/types/model_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["ModelListParams"]
+
+
+class ModelListParams(TypedDict, total=False):
+    after_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately after this object.
+    """
+
+    before_id: str
+    """ID of the object to use as a cursor for pagination.
+
+    When provided, returns the page of results immediately before this object.
+    """
+
+    limit: int
+    """Number of items to return per page.
+
+    Defaults to `20`. Ranges from `1` to `1000`.
+    """
diff --git a/src/anthropic/types/raw_content_block_delta_event.py b/src/anthropic/types/raw_content_block_delta_event.py
index b384fbd3..8785197f 100644
--- a/src/anthropic/types/raw_content_block_delta_event.py
+++ b/src/anthropic/types/raw_content_block_delta_event.py
@@ -1,16 +1,15 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
+from typing_extensions import Literal, TypeAlias
 
-from .._utils import PropertyInfo
 from .._models import BaseModel
 from .text_delta import TextDelta
 from .input_json_delta import InputJSONDelta
 
 __all__ = ["RawContentBlockDeltaEvent", "Delta"]
 
-Delta: TypeAlias = Annotated[Union[TextDelta, InputJSONDelta], PropertyInfo(discriminator="type")]
+Delta: TypeAlias = Union[TextDelta, InputJSONDelta]
 
 
 class RawContentBlockDeltaEvent(BaseModel):
diff --git a/src/anthropic/types/shared/__init__.py b/src/anthropic/types/shared/__init__.py
new file mode 100644
index 00000000..178643b6
--- /dev/null
+++ b/src/anthropic/types/shared/__init__.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .error_object import ErrorObject as ErrorObject
+from .billing_error import BillingError as BillingError
+from .error_response import ErrorResponse as ErrorResponse
+from .not_found_error import NotFoundError as NotFoundError
+from .api_error_object import APIErrorObject as APIErrorObject
+from .overloaded_error import OverloadedError as OverloadedError
+from .permission_error import PermissionError as PermissionError
+from .rate_limit_error import RateLimitError as RateLimitError
+from .authentication_error import AuthenticationError as AuthenticationError
+from .gateway_timeout_error import GatewayTimeoutError as GatewayTimeoutError
+from .invalid_request_error import InvalidRequestError as InvalidRequestError
diff --git a/src/anthropic/types/shared/api_error_object.py b/src/anthropic/types/shared/api_error_object.py
new file mode 100644
index 00000000..dd92bead
--- /dev/null
+++ b/src/anthropic/types/shared/api_error_object.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["APIErrorObject"]
+
+
+class APIErrorObject(BaseModel):
+    message: str
+
+    type: Literal["api_error"]
diff --git a/src/anthropic/types/shared/authentication_error.py b/src/anthropic/types/shared/authentication_error.py
new file mode 100644
index 00000000..f777f5c8
--- /dev/null
+++ b/src/anthropic/types/shared/authentication_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["AuthenticationError"]
+
+
+class AuthenticationError(BaseModel):
+    message: str
+
+    type: Literal["authentication_error"]
diff --git a/src/anthropic/types/shared/billing_error.py b/src/anthropic/types/shared/billing_error.py
new file mode 100644
index 00000000..26be12bb
--- /dev/null
+++ b/src/anthropic/types/shared/billing_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["BillingError"]
+
+
+class BillingError(BaseModel):
+    message: str
+
+    type: Literal["billing_error"]
diff --git a/src/anthropic/types/shared/error_object.py b/src/anthropic/types/shared/error_object.py
new file mode 100644
index 00000000..086db503
--- /dev/null
+++ b/src/anthropic/types/shared/error_object.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .billing_error import BillingError
+from .not_found_error import NotFoundError
+from .api_error_object import APIErrorObject
+from .overloaded_error import OverloadedError
+from .permission_error import PermissionError
+from .rate_limit_error import RateLimitError
+from .authentication_error import AuthenticationError
+from .gateway_timeout_error import GatewayTimeoutError
+from .invalid_request_error import InvalidRequestError
+
+__all__ = ["ErrorObject"]
+
+ErrorObject: TypeAlias = Annotated[
+    Union[
+        InvalidRequestError,
+        AuthenticationError,
+        BillingError,
+        PermissionError,
+        NotFoundError,
+        RateLimitError,
+        GatewayTimeoutError,
+        APIErrorObject,
+        OverloadedError,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/anthropic/types/shared/error_response.py b/src/anthropic/types/shared/error_response.py
new file mode 100644
index 00000000..97034923
--- /dev/null
+++ b/src/anthropic/types/shared/error_response.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .error_object import ErrorObject
+
+__all__ = ["ErrorResponse"]
+
+
+class ErrorResponse(BaseModel):
+    error: ErrorObject
+
+    type: Literal["error"]
diff --git a/src/anthropic/types/shared/gateway_timeout_error.py b/src/anthropic/types/shared/gateway_timeout_error.py
new file mode 100644
index 00000000..908aa12f
--- /dev/null
+++ b/src/anthropic/types/shared/gateway_timeout_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["GatewayTimeoutError"]
+
+
+class GatewayTimeoutError(BaseModel):
+    message: str
+
+    type: Literal["timeout_error"]
diff --git a/src/anthropic/types/shared/invalid_request_error.py b/src/anthropic/types/shared/invalid_request_error.py
new file mode 100644
index 00000000..ee5befc0
--- /dev/null
+++ b/src/anthropic/types/shared/invalid_request_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InvalidRequestError"]
+
+
+class InvalidRequestError(BaseModel):
+    message: str
+
+    type: Literal["invalid_request_error"]
diff --git a/src/anthropic/types/shared/not_found_error.py b/src/anthropic/types/shared/not_found_error.py
new file mode 100644
index 00000000..43e826fb
--- /dev/null
+++ b/src/anthropic/types/shared/not_found_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["NotFoundError"]
+
+
+class NotFoundError(BaseModel):
+    message: str
+
+    type: Literal["not_found_error"]
diff --git a/src/anthropic/types/shared/overloaded_error.py b/src/anthropic/types/shared/overloaded_error.py
new file mode 100644
index 00000000..74ee8373
--- /dev/null
+++ b/src/anthropic/types/shared/overloaded_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["OverloadedError"]
+
+
+class OverloadedError(BaseModel):
+    message: str
+
+    type: Literal["overloaded_error"]
diff --git a/src/anthropic/types/shared/permission_error.py b/src/anthropic/types/shared/permission_error.py
new file mode 100644
index 00000000..48eb3546
--- /dev/null
+++ b/src/anthropic/types/shared/permission_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["PermissionError"]
+
+
+class PermissionError(BaseModel):
+    message: str
+
+    type: Literal["permission_error"]
diff --git a/src/anthropic/types/shared/rate_limit_error.py b/src/anthropic/types/shared/rate_limit_error.py
new file mode 100644
index 00000000..3fa065ac
--- /dev/null
+++ b/src/anthropic/types/shared/rate_limit_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["RateLimitError"]
+
+
+class RateLimitError(BaseModel):
+    message: str
+
+    type: Literal["rate_limit_error"]
diff --git a/src/anthropic/types/text_block_param.py b/src/anthropic/types/text_block_param.py
index 825d1660..0b27ee2b 100644
--- a/src/anthropic/types/text_block_param.py
+++ b/src/anthropic/types/text_block_param.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
 __all__ = ["TextBlockParam"]
 
 
@@ -11,3 +14,5 @@ class TextBlockParam(TypedDict, total=False):
     text: Required[str]
 
     type: Required[Literal["text"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/tool_param.py b/src/anthropic/types/tool_param.py
index 35a95516..3a6ab1dd 100644
--- a/src/anthropic/types/tool_param.py
+++ b/src/anthropic/types/tool_param.py
@@ -5,6 +5,8 @@
 from typing import Dict, Union, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
 __all__ = ["ToolParam", "InputSchema"]
 
 
@@ -31,6 +33,8 @@ class ToolParam(TypedDict, total=False):
     This is how the tool will be called by the model and in tool_use blocks.
     """
 
+    cache_control: Optional[CacheControlEphemeralParam]
+
     description: str
     """Description of what this tool does.
 
diff --git a/src/anthropic/types/tool_result_block_param.py b/src/anthropic/types/tool_result_block_param.py
index 7c212e19..b6ca8aa9 100644
--- a/src/anthropic/types/tool_result_block_param.py
+++ b/src/anthropic/types/tool_result_block_param.py
@@ -2,11 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable
+from typing import Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .text_block_param import TextBlockParam
 from .image_block_param import ImageBlockParam
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
 
 __all__ = ["ToolResultBlockParam", "Content"]
 
@@ -18,6 +19,8 @@ class ToolResultBlockParam(TypedDict, total=False):
 
     type: Required[Literal["tool_result"]]
 
+    cache_control: Optional[CacheControlEphemeralParam]
+
     content: Union[str, Iterable[Content]]
 
     is_error: bool
diff --git a/src/anthropic/types/tool_use_block_param.py b/src/anthropic/types/tool_use_block_param.py
index e0218476..cc285079 100644
--- a/src/anthropic/types/tool_use_block_param.py
+++ b/src/anthropic/types/tool_use_block_param.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .cache_control_ephemeral_param import CacheControlEphemeralParam
+
 __all__ = ["ToolUseBlockParam"]
 
 
@@ -15,3 +18,5 @@ class ToolUseBlockParam(TypedDict, total=False):
     name: Required[str]
 
     type: Required[Literal["tool_use"]]
+
+    cache_control: Optional[CacheControlEphemeralParam]
diff --git a/src/anthropic/types/usage.py b/src/anthropic/types/usage.py
index 88f1ec84..b4f817bd 100644
--- a/src/anthropic/types/usage.py
+++ b/src/anthropic/types/usage.py
@@ -1,5 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 
 from .._models import BaseModel
 
@@ -7,6 +8,12 @@
 
 
 class Usage(BaseModel):
+    cache_creation_input_tokens: Optional[int] = None
+    """The number of input tokens used to create the cache entry."""
+
+    cache_read_input_tokens: Optional[int] = None
+    """The number of input tokens read from the cache."""
+
     input_tokens: int
     """The number of input tokens which were used."""
 
diff --git a/tests/api_resources/beta/prompt_caching/test_messages.py b/tests/api_resources/beta/prompt_caching/test_messages.py
deleted file mode 100644
index 4ecb1624..00000000
--- a/tests/api_resources/beta/prompt_caching/test_messages.py
+++ /dev/null
@@ -1,442 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from anthropic import Anthropic, AsyncAnthropic
-from tests.utils import assert_matches_type
-from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestMessages:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: Anthropic) -> None:
-        message = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> None:
-        message = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            stream=False,
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
-        response = client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        message = response.parse()
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: Anthropic) -> None:
-        with client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            message = response.parse()
-            assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_create_overload_2(self, client: Anthropic) -> None:
-        message_stream = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-        message_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> None:
-        message_stream = client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        message_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
-        response = client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: Anthropic) -> None:
-        with client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncMessages:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncAnthropic) -> None:
-        message = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncAnthropic) -> None:
-        message = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            stream=False,
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
-        response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        message = response.parse()
-        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
-        async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            message = await response.parse()
-            assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncAnthropic) -> None:
-        message_stream = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-        await message_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncAnthropic) -> None:
-        message_stream = await async_client.beta.prompt_caching.messages.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
-            stop_sequences=["string"],
-            system=[
-                {
-                    "text": "Today's date is 2024-06-01.",
-                    "type": "text",
-                    "cache_control": {"type": "ephemeral"},
-                }
-            ],
-            temperature=1,
-            tool_choice={
-                "type": "auto",
-                "disable_parallel_tool_use": True,
-            },
-            tools=[
-                {
-                    "input_schema": {
-                        "type": "object",
-                        "properties": {
-                            "location": {
-                                "description": "The city and state, e.g. San Francisco, CA",
-                                "type": "string",
-                            },
-                            "unit": {
-                                "description": "Unit for the output - one of (celsius, fahrenheit)",
-                                "type": "string",
-                            },
-                        },
-                    },
-                    "name": "x",
-                    "cache_control": {"type": "ephemeral"},
-                    "description": "Get the current weather in a given location",
-                }
-            ],
-            top_k=5,
-            top_p=0.7,
-            betas=["string"],
-        )
-        await message_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
-        response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
-        async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
-            max_tokens=1024,
-            messages=[
-                {
-                    "content": "Hello, world",
-                    "role": "user",
-                }
-            ],
-            model="claude-3-5-sonnet-20241022",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/test_models.py b/tests/api_resources/beta/test_models.py
new file mode 100644
index 00000000..17ffd939
--- /dev/null
+++ b/tests/api_resources/beta/test_models.py
@@ -0,0 +1,167 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.pagination import SyncPage, AsyncPage
+from anthropic.types.beta import BetaModelInfo
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestModels:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: Anthropic) -> None:
+        model = client.beta.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Anthropic) -> None:
+        response = client.beta.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Anthropic) -> None:
+        with client.beta.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(BetaModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            client.beta.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Anthropic) -> None:
+        model = client.beta.models.list()
+        assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Anthropic) -> None:
+        model = client.beta.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Anthropic) -> None:
+        response = client.beta.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Anthropic) -> None:
+        with client.beta.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(SyncPage[BetaModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncModels:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.beta.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.beta.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(BetaModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.beta.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(BetaModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            await async_client.beta.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.beta.models.list()
+        assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.beta.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.beta.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.beta.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(AsyncPage[BetaModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/prompt_caching/__init__.py b/tests/api_resources/messages/__init__.py
similarity index 100%
rename from tests/api_resources/beta/prompt_caching/__init__.py
rename to tests/api_resources/messages/__init__.py
diff --git a/tests/api_resources/messages/test_batches.py b/tests/api_resources/messages/test_batches.py
new file mode 100644
index 00000000..770d3cb7
--- /dev/null
+++ b/tests/api_resources/messages/test_batches.py
@@ -0,0 +1,469 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+
+import os
+import json
+from typing import Any, cast
+
+import httpx
+import pytest
+from respx import MockRouter
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.pagination import SyncPage, AsyncPage
+from anthropic.types.messages import MessageBatch
+
+# pyright: reportDeprecated=false
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: Anthropic) -> None:
+        batch = client.messages.batches.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: Anthropic) -> None:
+        batch = client.messages.batches.retrieve(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.retrieve(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.retrieve(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            client.messages.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Anthropic) -> None:
+        batch = client.messages.batches.list()
+        assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Anthropic) -> None:
+        batch = client.messages.batches.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(SyncPage[MessageBatch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: Anthropic) -> None:
+        batch = client.messages.batches.cancel(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: Anthropic) -> None:
+        response = client.messages.batches.with_raw_response.cancel(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: Anthropic) -> None:
+        with client.messages.batches.with_streaming_response.cancel(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            client.messages.batches.with_raw_response.cancel(
+                "",
+            )
+
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("client", [False], indirect=True)
+    def test_method_results(self, client: Anthropic, respx_mock: MockRouter) -> None:
+        respx_mock.get("/v1/messages/batches/message_batch_id").mock(
+            return_value=httpx.Response(200, json={"results_url": "/v1/messages/batches/message_batch_id/results"})
+        )
+        respx_mock.get("/v1/messages/batches/message_batch_id/results").mock(
+            return_value=httpx.Response(
+                200, content="\n".join([json.dumps({"foo": "bar"}), json.dumps({"bar": "baz"})])
+            )
+        )
+        results = client.beta.messages.batches.results(
+            message_batch_id="message_batch_id",
+        )
+        assert results.http_response is not None
+        assert not results.http_response.is_stream_consumed
+
+        i = -1
+        for result in results:
+            i += 1
+            if i == 0:
+                assert result.to_dict() == {"foo": "bar"}
+            elif i == 1:
+                assert result.to_dict() == {"bar": "baz"}
+            else:
+                raise RuntimeError(f"iterated too many times, expected 2 times but got {i + 1}")
+
+        assert i == 1
+        assert results.http_response.is_stream_consumed
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    def test_path_params_results(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            client.messages.batches.with_raw_response.results(
+                "",
+            )
+
+
+class TestAsyncBatches:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.create(
+            requests=[
+                {
+                    "custom_id": "my-custom-id-1",
+                    "params": {
+                        "max_tokens": 1024,
+                        "messages": [
+                            {
+                                "content": "Hello, world",
+                                "role": "user",
+                            }
+                        ],
+                        "model": "claude-3-5-sonnet-20241022",
+                    },
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.retrieve(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.retrieve(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.retrieve(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            await async_client.messages.batches.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.list()
+        assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(AsyncPage[MessageBatch], batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncAnthropic) -> None:
+        batch = await async_client.messages.batches.cancel(
+            "message_batch_id",
+        )
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.batches.with_raw_response.cancel(
+            "message_batch_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        batch = response.parse()
+        assert_matches_type(MessageBatch, batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.batches.with_streaming_response.cancel(
+            "message_batch_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            batch = await response.parse()
+            assert_matches_type(MessageBatch, batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            await async_client.messages.batches.with_raw_response.cancel(
+                "",
+            )
+
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("async_client", [False], indirect=True)
+    async def test_method_results(self, async_client: AsyncAnthropic, respx_mock: MockRouter) -> None:
+        respx_mock.get("/v1/messages/batches/message_batch_id").mock(
+            return_value=httpx.Response(200, json={"results_url": "/v1/messages/batches/message_batch_id/results"})
+        )
+        respx_mock.get("/v1/messages/batches/message_batch_id/results").mock(
+            return_value=httpx.Response(
+                200, content="\n".join([json.dumps({"foo": "bar"}), json.dumps({"bar": "baz"})])
+            )
+        )
+        results = await async_client.beta.messages.batches.results(
+            message_batch_id="message_batch_id",
+        )
+        assert results.http_response is not None
+        assert not results.http_response.is_stream_consumed
+
+        i = -1
+        async for result in results:
+            i += 1
+            if i == 0:
+                assert result.to_dict() == {"foo": "bar"}
+            elif i == 1:
+                assert result.to_dict() == {"bar": "baz"}
+            else:
+                raise RuntimeError(f"iterated too many times, expected 2 times but got {i + 1}")
+
+        assert i == 1
+        assert results.http_response.is_stream_consumed
+
+    @parametrize
+    @pytest.mark.respx(base_url=base_url)
+    async def test_path_params_results(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_batch_id` but received ''"):
+            await async_client.messages.batches.with_raw_response.results(
+                "",
+            )
diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py
index 533918dd..1c1a0df0 100644
--- a/tests/api_resources/test_messages.py
+++ b/tests/api_resources/test_messages.py
@@ -9,7 +9,10 @@
 
 from anthropic import Anthropic, AsyncAnthropic
 from tests.utils import assert_matches_type
-from anthropic.types import Message
+from anthropic.types import (
+    Message,
+    MessageTokensCount,
+)
 from anthropic.resources.messages import DEPRECATED_MODELS
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -50,6 +53,7 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -73,6 +77,7 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -152,6 +157,7 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -175,6 +181,7 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -232,6 +239,99 @@ def test_deprecated_model_warning(self, client: Anthropic) -> None:
                     model=deprecated_model,
                 )
 
+    @parametrize
+    def test_method_count_tokens(self, client: Anthropic) -> None:
+        message = client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    def test_method_count_tokens_with_all_params(self, client: Anthropic) -> None:
+        message = client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                }
+            ],
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_count_tokens(self, client: Anthropic) -> None:
+        response = client.messages.with_raw_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_count_tokens(self, client: Anthropic) -> None:
+        with client.messages.with_streaming_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(MessageTokensCount, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncMessages:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -268,6 +368,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -291,6 +392,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -370,6 +472,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 {
                     "text": "Today's date is 2024-06-01.",
                     "type": "text",
+                    "cache_control": {"type": "ephemeral"},
                 }
             ],
             temperature=1,
@@ -393,6 +496,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                         },
                     },
                     "name": "x",
+                    "cache_control": {"type": "ephemeral"},
                     "description": "Get the current weather in a given location",
                 }
             ],
@@ -449,3 +553,96 @@ async def test_deprecated_model_warning(self, async_client: AsyncAnthropic) -> N
                     messages=[{"role": "user", "content": "Hello"}],
                     model=deprecated_model,
                 )
+
+    @parametrize
+    async def test_method_count_tokens(self, async_client: AsyncAnthropic) -> None:
+        message = await async_client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    async def test_method_count_tokens_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        message = await async_client.messages.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            tool_choice={
+                "type": "auto",
+                "disable_parallel_tool_use": True,
+            },
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                }
+            ],
+        )
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_count_tokens(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.messages.with_raw_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(MessageTokensCount, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_count_tokens(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.messages.with_streaming_response.count_tokens(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            model="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(MessageTokensCount, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
new file mode 100644
index 00000000..34b4961a
--- /dev/null
+++ b/tests/api_resources/test_models.py
@@ -0,0 +1,167 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.types import ModelInfo
+from anthropic.pagination import SyncPage, AsyncPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestModels:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: Anthropic) -> None:
+        model = client.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Anthropic) -> None:
+        response = client.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Anthropic) -> None:
+        with client.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(ModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Anthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            client.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Anthropic) -> None:
+        model = client.models.list()
+        assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Anthropic) -> None:
+        model = client.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Anthropic) -> None:
+        response = client.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Anthropic) -> None:
+        with client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(SyncPage[ModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncModels:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.models.retrieve(
+            "model_id",
+        )
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.models.with_raw_response.retrieve(
+            "model_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(ModelInfo, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.models.with_streaming_response.retrieve(
+            "model_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(ModelInfo, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncAnthropic) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
+            await async_client.models.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.models.list()
+        assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncAnthropic) -> None:
+        model = await async_client.models.list(
+            after_id="after_id",
+            before_id="before_id",
+            limit=1,
+        )
+        assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.models.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.models.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(AsyncPage[ModelInfo], model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True

From 45c28dd52a22994ad12fc409dfdbd9036b93f61a Mon Sep 17 00:00:00 2001
From: Robert Craigie <robert@craigie.dev>
Date: Mon, 16 Dec 2024 18:29:53 +0000
Subject: [PATCH 16/18] feat(vertex): support token counting

---
 src/anthropic/lib/vertex/_client.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/anthropic/lib/vertex/_client.py b/src/anthropic/lib/vertex/_client.py
index 3df48d0c..2c1869f8 100644
--- a/src/anthropic/lib/vertex/_client.py
+++ b/src/anthropic/lib/vertex/_client.py
@@ -441,7 +441,15 @@ def _prepare_options(input_options: FinalRequestOptions, *, project_id: str | No
 
         options.url = f"/projects/{project_id}/locations/{region}/publishers/anthropic/models/{model}:{specifier}"
 
+    if options.url in {"/v1/messages/count_tokens", "/v1/messages/count_tokens?beta=true"} and options.method == "post":
+        if project_id is None:
+            raise RuntimeError(
+                "No project_id was given and it could not be resolved from credentials. The client should be instantiated with the `project_id` argument or the `ANTHROPIC_VERTEX_PROJECT_ID` environment variable should be set."
+            )
+
         if is_dict(options.json_data):
-            options.json_data.pop("model", None)
+            options.json_data.pop("anthropic_version", None)
+
+        options.url = f"/projects/{project_id}/locations/{region}/publishers/anthropic/models/count-tokens:rawPredict"
 
     return options

From c17d68e63a9447a23cb262a98d539fcfe47935f8 Mon Sep 17 00:00:00 2001
From: Robert Craigie <robert@craigie.dev>
Date: Tue, 17 Dec 2024 14:16:26 +0000
Subject: [PATCH 17/18] chore(bedrock/vertex): explicit error for unsupported
 messages endpoints

---
 src/anthropic/lib/bedrock/_client.py | 8 +++++++-
 src/anthropic/lib/vertex/_client.py  | 5 ++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/anthropic/lib/bedrock/_client.py b/src/anthropic/lib/bedrock/_client.py
index 89c601d9..9d5eb3bc 100644
--- a/src/anthropic/lib/bedrock/_client.py
+++ b/src/anthropic/lib/bedrock/_client.py
@@ -14,7 +14,7 @@
 from ..._compat import model_copy
 from ..._version import __version__
 from ..._streaming import Stream, AsyncStream
-from ..._exceptions import APIStatusError
+from ..._exceptions import AnthropicError, APIStatusError
 from ..._base_client import (
     DEFAULT_MAX_RETRIES,
     BaseClient,
@@ -55,6 +55,12 @@ def _prepare_options(input_options: FinalRequestOptions) -> FinalRequestOptions:
         else:
             options.url = f"/model/{model}/invoke"
 
+    if options.url.startswith('/v1/messages/batches'):
+        raise AnthropicError('The Batch API is not supported in Bedrock yet')
+    
+    if options.url == '/v1/messages/count_tokens':
+        raise AnthropicError('Token counting is not supported in Bedrock yet')
+
     return options
 
 
diff --git a/src/anthropic/lib/vertex/_client.py b/src/anthropic/lib/vertex/_client.py
index 2c1869f8..d2f49029 100644
--- a/src/anthropic/lib/vertex/_client.py
+++ b/src/anthropic/lib/vertex/_client.py
@@ -15,7 +15,7 @@
 from ..._models import FinalRequestOptions
 from ..._version import __version__
 from ..._streaming import Stream, AsyncStream
-from ..._exceptions import APIStatusError
+from ..._exceptions import AnthropicError, APIStatusError
 from ..._base_client import (
     DEFAULT_MAX_RETRIES,
     DEFAULT_CONNECTION_LIMITS,
@@ -452,4 +452,7 @@ def _prepare_options(input_options: FinalRequestOptions, *, project_id: str | No
 
         options.url = f"/projects/{project_id}/locations/{region}/publishers/anthropic/models/count-tokens:rawPredict"
 
+    if options.url.startswith('/v1/messages/batches'):
+        raise AnthropicError('The Batch API is not supported in the Vertex client yet')
+    
     return options

From e33d250e47f12d6aceae1847b6468abe9b6fe6df Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 17 Dec 2024 14:17:17 +0000
Subject: [PATCH 18/18] release: 0.41.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 37 +++++++++++++++++++++++++++++++++++
 pyproject.toml                |  2 +-
 src/anthropic/_version.py     |  2 +-
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 0a40b9d7..ea2682c3 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.40.0"
+  ".": "0.41.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index eced9b61..643ed3a5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,42 @@
 # Changelog
 
+## 0.41.0 (2024-12-17)
+
+Full Changelog: [v0.40.0...v0.41.0](https://github.com/anthropics/anthropic-sdk-python/compare/v0.40.0...v0.41.0)
+
+### Features
+
+* **api:** general availability updates ([5db8538](https://github.com/anthropics/anthropic-sdk-python/commit/5db8538cca2ab957ccb5460bf3f33636de0a5106))
+* **api:** general availability updates ([#795](https://github.com/anthropics/anthropic-sdk-python/issues/795)) ([c8d5e43](https://github.com/anthropics/anthropic-sdk-python/commit/c8d5e43d00e0e68a68b9ecac15414135750495ff))
+* **vertex:** support token counting ([6c3eded](https://github.com/anthropics/anthropic-sdk-python/commit/6c3ededeb68f878a94998d61d0c78209ec640d0c))
+
+
+### Bug Fixes
+
+* **internal:** correct support for TypeAliasType ([2f6ba9e](https://github.com/anthropics/anthropic-sdk-python/commit/2f6ba9e9f827b39b373a4b2904df04fef940001a))
+
+
+### Chores
+
+* **api:** update spec version ([#792](https://github.com/anthropics/anthropic-sdk-python/issues/792)) ([f54c1da](https://github.com/anthropics/anthropic-sdk-python/commit/f54c1daf964d0cca09e023bd89adf7d9c97f385d))
+* **bedrock/vertex:** explicit error for unsupported messages endpoints ([c4cf816](https://github.com/anthropics/anthropic-sdk-python/commit/c4cf8164c20081cc75fefbe39db5b76be1c724e1))
+* **internal:** add support for TypeAliasType ([#786](https://github.com/anthropics/anthropic-sdk-python/issues/786)) ([287ebd2](https://github.com/anthropics/anthropic-sdk-python/commit/287ebd2287ca90408999fe6be3b6f8c0295b46ef))
+* **internal:** bump pydantic dependency ([#775](https://github.com/anthropics/anthropic-sdk-python/issues/775)) ([99b4d06](https://github.com/anthropics/anthropic-sdk-python/commit/99b4d06e73cdae3f2c97c304b8c0b64ec4758768))
+* **internal:** bump pyright ([#769](https://github.com/anthropics/anthropic-sdk-python/issues/769)) ([81f7d70](https://github.com/anthropics/anthropic-sdk-python/commit/81f7d70fa85029f86de30ac1701ec39d01dde8f9))
+* **internal:** bump pyright ([#785](https://github.com/anthropics/anthropic-sdk-python/issues/785)) ([44ab333](https://github.com/anthropics/anthropic-sdk-python/commit/44ab3339b7f3860e3a492a1784f247702bea5be0))
+* **internal:** remove some duplicated imports ([#788](https://github.com/anthropics/anthropic-sdk-python/issues/788)) ([576ae9b](https://github.com/anthropics/anthropic-sdk-python/commit/576ae9b83214fd78fb02c420abfc0760270ffba8))
+* **internal:** update spec ([#793](https://github.com/anthropics/anthropic-sdk-python/issues/793)) ([7cffc99](https://github.com/anthropics/anthropic-sdk-python/commit/7cffc992b17c475ceaef90868660c5a536e51624))
+* **internal:** updated imports ([#789](https://github.com/anthropics/anthropic-sdk-python/issues/789)) ([d163c08](https://github.com/anthropics/anthropic-sdk-python/commit/d163c08caa9515fc5f59f284d236b16e7f0adb40))
+* make the `Omit` type public ([#772](https://github.com/anthropics/anthropic-sdk-python/issues/772)) ([4ed0419](https://github.com/anthropics/anthropic-sdk-python/commit/4ed041961b59a7943b00a8e592ead0e962f36174))
+* remove deprecated HTTP client options ([#777](https://github.com/anthropics/anthropic-sdk-python/issues/777)) ([3933368](https://github.com/anthropics/anthropic-sdk-python/commit/3933368e8a54d1f81c9503576e461c3d75292c39))
+
+
+### Documentation
+
+* **readme:** example snippet for client context manager ([#791](https://github.com/anthropics/anthropic-sdk-python/issues/791)) ([d0a5f0c](https://github.com/anthropics/anthropic-sdk-python/commit/d0a5f0c0568afcac5680d4c38943c8d634521c06))
+* **readme:** fix http client proxies example ([#778](https://github.com/anthropics/anthropic-sdk-python/issues/778)) ([df1a549](https://github.com/anthropics/anthropic-sdk-python/commit/df1a5494d2d3be86717c344654ce54e2f97f19be))
+* use latest sonnet in example snippets ([#781](https://github.com/anthropics/anthropic-sdk-python/issues/781)) ([1ad9e4f](https://github.com/anthropics/anthropic-sdk-python/commit/1ad9e4ff559f16760de15e2352a64bb2b3103071))
+
 ## 0.40.0 (2024-11-28)
 
 Full Changelog: [v0.39.0...v0.40.0](https://github.com/anthropics/anthropic-sdk-python/compare/v0.39.0...v0.40.0)
diff --git a/pyproject.toml b/pyproject.toml
index a2eba320..24a27d91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "anthropic"
-version = "0.40.0"
+version = "0.41.0"
 description = "The official Python library for the anthropic API"
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/anthropic/_version.py b/src/anthropic/_version.py
index a87bfc50..15287dee 100644
--- a/src/anthropic/_version.py
+++ b/src/anthropic/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "anthropic"
-__version__ = "0.40.0"  # x-release-please-version
+__version__ = "0.41.0"  # x-release-please-version