From bd70a33234741fa68c185105e4f53cc0275a2a50 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 28 Oct 2025 00:54:12 +0000
Subject: [PATCH 1/8] feat(api): remove InputAudio from ResponseInputContent

Removes the type `InputAudio` from `ResponseInputContent`. This parameter was non-functional and has now been removed. Please note that this is not a feature removal; it was never supported by the Responses API.

While this is technically a backward-incompatible change due to the type removal, it reflects the intended behavior and has no functional impact.
---
 .stats.yml                                                   | 4 ++--
 src/openai/types/responses/response_input_content.py         | 4 +---
 src/openai/types/responses/response_input_content_param.py   | 5 +----
 .../responses/response_input_message_content_list_param.py   | 5 +----
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index b4309cd4c3..bc4e084f99 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a3c45d9bd3bb25bf4eaa49b7fb473a00038293dec659ffaa44f624ded884abf4.yml
-openapi_spec_hash: 9c20aaf786a0700dabd13d9865481c9e
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
+openapi_spec_hash: 1560717860bba4105936647dde8f618d
 config_hash: 50ee3382a63c021a9f821a935950e926
diff --git a/src/openai/types/responses/response_input_content.py b/src/openai/types/responses/response_input_content.py
index 376b9ffce8..1726909a17 100644
--- a/src/openai/types/responses/response_input_content.py
+++ b/src/openai/types/responses/response_input_content.py
@@ -6,12 +6,10 @@
 from ..._utils import PropertyInfo
 from .response_input_file import ResponseInputFile
 from .response_input_text import ResponseInputText
-from .response_input_audio import ResponseInputAudio
 from .response_input_image import ResponseInputImage
 
 __all__ = ["ResponseInputContent"]
 
 ResponseInputContent: TypeAlias = Annotated[
-    Union[ResponseInputText, ResponseInputImage, ResponseInputFile, ResponseInputAudio],
-    PropertyInfo(discriminator="type"),
+    Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type")
 ]
diff --git a/src/openai/types/responses/response_input_content_param.py b/src/openai/types/responses/response_input_content_param.py
index a95e026a53..7791cdfd8e 100644
--- a/src/openai/types/responses/response_input_content_param.py
+++ b/src/openai/types/responses/response_input_content_param.py
@@ -7,11 +7,8 @@
 
 from .response_input_file_param import ResponseInputFileParam
 from .response_input_text_param import ResponseInputTextParam
-from .response_input_audio_param import ResponseInputAudioParam
 from .response_input_image_param import ResponseInputImageParam
 
 __all__ = ["ResponseInputContentParam"]
 
-ResponseInputContentParam: TypeAlias = Union[
-    ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam, ResponseInputAudioParam
-]
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
diff --git a/src/openai/types/responses/response_input_message_content_list_param.py b/src/openai/types/responses/response_input_message_content_list_param.py
index 8e3778d15a..080613df0d 100644
--- a/src/openai/types/responses/response_input_message_content_list_param.py
+++ b/src/openai/types/responses/response_input_message_content_list_param.py
@@ -7,13 +7,10 @@
 
 from .response_input_file_param import ResponseInputFileParam
 from .response_input_text_param import ResponseInputTextParam
-from .response_input_audio_param import ResponseInputAudioParam
 from .response_input_image_param import ResponseInputImageParam
 
 __all__ = ["ResponseInputMessageContentListParam", "ResponseInputContentParam"]
 
-ResponseInputContentParam: TypeAlias = Union[
-    ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam, ResponseInputAudioParam
-]
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
 
 ResponseInputMessageContentListParam: TypeAlias = List[ResponseInputContentParam]

From d8bb7d6d728c5481de4198eebe668b67803ae14a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:42:58 +0000
Subject: [PATCH 2/8] fix(client): close streams without requiring full
 consumption

---
 src/openai/_streaming.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index f586de74ff..05c284a2be 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -96,9 +96,8 @@ def __stream__(self) -> Iterator[_T]:
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
-        # Ensure the entire stream is consumed
-        for _sse in iterator:
-            ...
+        # As we might not fully consume the response stream, we need to close it explicitly
+        response.close()
 
     def __enter__(self) -> Self:
         return self
@@ -198,9 +197,8 @@ async def __stream__(self) -> AsyncIterator[_T]:
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
-        # Ensure the entire stream is consumed
-        async for _sse in iterator:
-            ...
+        # As we might not fully consume the response stream, we need to close it explicitly
+        await response.aclose()
 
     async def __aenter__(self) -> Self:
         return self

From d0370a8d61fc2f710a34d8aad48f649a9683106d Mon Sep 17 00:00:00 2001
From: Dan Martins <danmartins@openai.com>
Date: Wed, 29 Oct 2025 08:53:08 -0400
Subject: [PATCH 3/8] fix(readme): update realtime examples (#2714)

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 9311b477a3..21f79312a1 100644
--- a/README.md
+++ b/README.md
@@ -244,7 +244,9 @@ async def main():
     client = AsyncOpenAI()
 
     async with client.realtime.connect(model="gpt-realtime") as connection:
-        await connection.session.update(session={'modalities': ['text']})
+        await connection.session.update(
+            session={"type": "realtime", "output_modalities": ["text"]}
+        )
 
         await connection.conversation.item.create(
             item={
@@ -256,10 +258,10 @@ async def main():
         await connection.response.create()
 
         async for event in connection:
-            if event.type == 'response.text.delta':
+            if event.type == "response.output_text.delta":
                 print(event.delta, flush=True, end="")
 
-            elif event.type == 'response.text.done':
+            elif event.type == "response.output_text.done":
                 print()
 
             elif event.type == "response.done":

From 4f1b691ab4db41aebd397ec41942b43fb0f0743c Mon Sep 17 00:00:00 2001
From: Showmick Das <showmickdas75@gmail.com>
Date: Thu, 30 Oct 2025 05:52:29 -0400
Subject: [PATCH 4/8] fix(uploads): avoid file handle leak

---
 src/openai/resources/uploads/uploads.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py
index 8953256f2a..e8c047bd4f 100644
--- a/src/openai/resources/uploads/uploads.py
+++ b/src/openai/resources/uploads/uploads.py
@@ -157,9 +157,8 @@ def upload_file_chunked(
                 part = self.parts.create(upload_id=upload.id, data=data)
                 log.info("Uploaded part %s for upload %s", part.id, upload.id)
                 part_ids.append(part.id)
-        except Exception:
+        finally:
             buf.close()
-            raise
 
         return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
 
@@ -465,9 +464,8 @@ async def upload_file_chunked(
                     part = await self.parts.create(upload_id=upload.id, data=data)
                     log.info("Uploaded part %s for upload %s", part.id, upload.id)
                     part_ids.append(part.id)
-            except Exception:
+            finally:
                 buf.close()
-                raise
 
         return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
 

From 933d23bd8d7809c77e0796becfe052167d44d40a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:05:12 +0000
Subject: [PATCH 5/8] chore(internal/tests): avoid race condition with implicit
 client cleanup

---
 tests/test_client.py | 372 +++++++++++++++++++++++--------------------
 1 file changed, 202 insertions(+), 170 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 3287e0e706..e8d62f17f7 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -64,51 +64,49 @@ def _get_open_connections(client: OpenAI | AsyncOpenAI) -> int:
 
 
 class TestOpenAI:
-    client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response(self, respx_mock: MockRouter) -> None:
+    def test_raw_response(self, respx_mock: MockRouter, client: OpenAI) -> None:
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.post("/foo", cast_to=httpx.Response)
+        response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+    def test_raw_response_for_binary(self, respx_mock: MockRouter, client: OpenAI) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
         )
 
-        response = self.client.post("/foo", cast_to=httpx.Response)
+        response = client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
-    def test_copy(self) -> None:
-        copied = self.client.copy()
-        assert id(copied) != id(self.client)
+    def test_copy(self, client: OpenAI) -> None:
+        copied = client.copy()
+        assert id(copied) != id(client)
 
-        copied = self.client.copy(api_key="another My API Key")
+        copied = client.copy(api_key="another My API Key")
         assert copied.api_key == "another My API Key"
-        assert self.client.api_key == "My API Key"
+        assert client.api_key == "My API Key"
 
-    def test_copy_default_options(self) -> None:
+    def test_copy_default_options(self, client: OpenAI) -> None:
         # options that have a default are overridden correctly
-        copied = self.client.copy(max_retries=7)
+        copied = client.copy(max_retries=7)
         assert copied.max_retries == 7
-        assert self.client.max_retries == 2
+        assert client.max_retries == 2
 
         copied2 = copied.copy(max_retries=6)
         assert copied2.max_retries == 6
         assert copied.max_retries == 7
 
         # timeout
-        assert isinstance(self.client.timeout, httpx.Timeout)
-        copied = self.client.copy(timeout=None)
+        assert isinstance(client.timeout, httpx.Timeout)
+        copied = client.copy(timeout=None)
         assert copied.timeout is None
-        assert isinstance(self.client.timeout, httpx.Timeout)
+        assert isinstance(client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
         client = OpenAI(
@@ -143,6 +141,7 @@ def test_copy_default_headers(self) -> None:
             match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
         ):
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+        client.close()
 
     def test_copy_default_query(self) -> None:
         client = OpenAI(
@@ -180,13 +179,15 @@ def test_copy_default_query(self) -> None:
         ):
             client.copy(set_default_query={}, default_query={"foo": "Bar"})
 
-    def test_copy_signature(self) -> None:
+        client.close()
+
+    def test_copy_signature(self, client: OpenAI) -> None:
         # ensure the same parameters that can be passed to the client are defined in the `.copy()` method
         init_signature = inspect.signature(
             # mypy doesn't like that we access the `__init__` property.
-            self.client.__init__,  # type: ignore[misc]
+            client.__init__,  # type: ignore[misc]
         )
-        copy_signature = inspect.signature(self.client.copy)
+        copy_signature = inspect.signature(client.copy)
         exclude_params = {"transport", "proxies", "_strict_response_validation"}
 
         for name in init_signature.parameters.keys():
@@ -197,12 +198,12 @@ def test_copy_signature(self) -> None:
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
     @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
-    def test_copy_build_request(self) -> None:
+    def test_copy_build_request(self, client: OpenAI) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
         def build_request(options: FinalRequestOptions) -> None:
-            client = self.client.copy()
-            client._build_request(options)
+            client_copy = client.copy()
+            client_copy._build_request(options)
 
         # ensure that the machinery is warmed up before tracing starts.
         build_request(options)
@@ -259,14 +260,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                     print(frame)
             raise AssertionError()
 
-    def test_request_timeout(self) -> None:
-        request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+    def test_request_timeout(self, client: OpenAI) -> None:
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = self.client._build_request(
-            FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
-        )
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(100.0)
 
@@ -277,6 +276,8 @@ def test_client_timeout_option(self) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(0)
 
+        client.close()
+
     def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         with httpx.Client(timeout=None) as http_client:
@@ -288,6 +289,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
+            client.close()
+
         # no timeout given to the httpx client should not use the httpx default
         with httpx.Client() as http_client:
             client = OpenAI(
@@ -298,6 +301,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
+            client.close()
+
         # explicitly passing the default timeout currently results in it being ignored
         with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
             client = OpenAI(
@@ -308,6 +313,8 @@ def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+            client.close()
+
     async def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             async with httpx.AsyncClient() as http_client:
@@ -319,14 +326,14 @@ async def test_invalid_http_client(self) -> None:
                 )
 
     def test_default_headers_option(self) -> None:
-        client = OpenAI(
+        test_client = OpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
-        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = OpenAI(
+        test_client2 = OpenAI(
             base_url=base_url,
             api_key=api_key,
             _strict_response_validation=True,
@@ -335,10 +342,13 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+        test_client.close()
+        test_client2.close()
+
     def test_validate_headers(self) -> None:
         client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         options = client._prepare_options(FinalRequestOptions(method="get", url="/foo"))
@@ -369,8 +379,10 @@ def test_default_query_option(self) -> None:
         url = httpx.URL(request.url)
         assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
-    def test_request_extra_json(self) -> None:
-        request = self.client._build_request(
+        client.close()
+
+    def test_request_extra_json(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -381,7 +393,7 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": False}
 
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -392,7 +404,7 @@ def test_request_extra_json(self) -> None:
         assert data == {"baz": False}
 
         # `extra_json` takes priority over `json_data` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -403,8 +415,8 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": None}
 
-    def test_request_extra_headers(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_headers(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -414,7 +426,7 @@ def test_request_extra_headers(self) -> None:
         assert request.headers.get("X-Foo") == "Foo"
 
         # `extra_headers` takes priority over `default_headers` when keys clash
-        request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+        request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -425,8 +437,8 @@ def test_request_extra_headers(self) -> None:
         )
         assert request.headers.get("X-Bar") == "false"
 
-    def test_request_extra_query(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_query(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -439,7 +451,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -453,7 +465,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -496,7 +508,7 @@ def test_multipart_repeating_array(self, client: OpenAI) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+    def test_basic_union_response(self, respx_mock: MockRouter, client: OpenAI) -> None:
         class Model1(BaseModel):
             name: str
 
@@ -505,12 +517,12 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+    def test_union_response_different_types(self, respx_mock: MockRouter, client: OpenAI) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -521,18 +533,18 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+    def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter, client: OpenAI) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -548,7 +560,7 @@ class Model(BaseModel):
             )
         )
 
-        response = self.client.get("/foo", cast_to=Model)
+        response = client.get("/foo", cast_to=Model)
         assert isinstance(response, Model)
         assert response.foo == 2
 
@@ -560,6 +572,8 @@ def test_base_url_setter(self) -> None:
 
         assert client.base_url == "https://example.com/from_setter/"
 
+        client.close()
+
     def test_base_url_env(self) -> None:
         with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"):
             client = OpenAI(api_key=api_key, _strict_response_validation=True)
@@ -587,6 +601,7 @@ def test_base_url_trailing_slash(self, client: OpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -610,6 +625,7 @@ def test_base_url_no_trailing_slash(self, client: OpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -633,35 +649,36 @@ def test_absolute_request_url(self, client: OpenAI) -> None:
             ),
         )
         assert request.url == "https://myapi.com/foo"
+        client.close()
 
     def test_copied_client_does_not_close_http(self) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
+        test_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        assert not test_client.is_closed()
 
-        copied = client.copy()
-        assert copied is not client
+        copied = test_client.copy()
+        assert copied is not test_client
 
         del copied
 
-        assert not client.is_closed()
+        assert not test_client.is_closed()
 
     def test_client_context_manager(self) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        with client as c2:
-            assert c2 is client
+        test_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        with test_client as c2:
+            assert c2 is test_client
             assert not c2.is_closed()
-            assert not client.is_closed()
-        assert client.is_closed()
+            assert not test_client.is_closed()
+        assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+    def test_client_response_validation_error(self, respx_mock: MockRouter, client: OpenAI) -> None:
         class Model(BaseModel):
             foo: str
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
-            self.client.get("/foo", cast_to=Model)
+            client.get("/foo", cast_to=Model)
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
@@ -670,13 +687,13 @@ def test_client_max_retries_validation(self) -> None:
             OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None))
 
     @pytest.mark.respx(base_url=base_url)
-    def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
+    def test_default_stream_cls(self, respx_mock: MockRouter, client: OpenAI) -> None:
         class Model(BaseModel):
             name: str
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        stream = self.client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
+        stream = client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model])
         assert isinstance(stream, Stream)
         stream.response.close()
 
@@ -692,11 +709,14 @@ class Model(BaseModel):
         with pytest.raises(APIResponseValidationError):
             strict_client.get("/foo", cast_to=Model)
 
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
+        non_strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
 
-        response = client.get("/foo", cast_to=Model)
+        response = non_strict_client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
 
+        strict_client.close()
+        non_strict_client.close()
+
     @pytest.mark.parametrize(
         "remaining_retries,retry_after,timeout",
         [
@@ -719,9 +739,9 @@ class Model(BaseModel):
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
-    def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
+    def test_parse_retry_after_header(
+        self, remaining_retries: int, retry_after: str, timeout: float, client: OpenAI
+    ) -> None:
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
@@ -743,7 +763,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, clien
                 model="gpt-4o",
             ).__enter__()
 
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(client) == 0
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
@@ -760,7 +780,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client
                 ],
                 model="gpt-4o",
             ).__enter__()
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@@ -919,28 +939,26 @@ def test_default_client_creation(self) -> None:
         )
 
     @pytest.mark.respx(base_url=base_url)
-    def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+    def test_follow_redirects(self, respx_mock: MockRouter, client: OpenAI) -> None:
         # Test that the default follow_redirects=True allows following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
         respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
 
-        response = self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        response = client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
         assert response.status_code == 200
         assert response.json() == {"status": "ok"}
 
     @pytest.mark.respx(base_url=base_url)
-    def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+    def test_follow_redirects_disabled(self, respx_mock: MockRouter, client: OpenAI) -> None:
         # Test that follow_redirects=False prevents following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
 
         with pytest.raises(APIStatusError) as exc_info:
-            self.client.post(
-                "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
-            )
+            client.post("/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response)
 
         assert exc_info.value.response.status_code == 302
         assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
@@ -1003,55 +1021,51 @@ def test_copy_auth(self) -> None:
 
 
 class TestAsyncOpenAI:
-    client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_raw_response(self, respx_mock: MockRouter) -> None:
+    async def test_raw_response(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.post("/foo", cast_to=httpx.Response)
+        response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None:
+    async def test_raw_response_for_binary(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         respx_mock.post("/foo").mock(
             return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}')
         )
 
-        response = await self.client.post("/foo", cast_to=httpx.Response)
+        response = await async_client.post("/foo", cast_to=httpx.Response)
         assert response.status_code == 200
         assert isinstance(response, httpx.Response)
         assert response.json() == {"foo": "bar"}
 
-    def test_copy(self) -> None:
-        copied = self.client.copy()
-        assert id(copied) != id(self.client)
+    def test_copy(self, async_client: AsyncOpenAI) -> None:
+        copied = async_client.copy()
+        assert id(copied) != id(async_client)
 
-        copied = self.client.copy(api_key="another My API Key")
+        copied = async_client.copy(api_key="another My API Key")
         assert copied.api_key == "another My API Key"
-        assert self.client.api_key == "My API Key"
+        assert async_client.api_key == "My API Key"
 
-    def test_copy_default_options(self) -> None:
+    def test_copy_default_options(self, async_client: AsyncOpenAI) -> None:
         # options that have a default are overridden correctly
-        copied = self.client.copy(max_retries=7)
+        copied = async_client.copy(max_retries=7)
         assert copied.max_retries == 7
-        assert self.client.max_retries == 2
+        assert async_client.max_retries == 2
 
         copied2 = copied.copy(max_retries=6)
         assert copied2.max_retries == 6
         assert copied.max_retries == 7
 
         # timeout
-        assert isinstance(self.client.timeout, httpx.Timeout)
-        copied = self.client.copy(timeout=None)
+        assert isinstance(async_client.timeout, httpx.Timeout)
+        copied = async_client.copy(timeout=None)
         assert copied.timeout is None
-        assert isinstance(self.client.timeout, httpx.Timeout)
+        assert isinstance(async_client.timeout, httpx.Timeout)
 
-    def test_copy_default_headers(self) -> None:
+    async def test_copy_default_headers(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
@@ -1084,8 +1098,9 @@ def test_copy_default_headers(self) -> None:
             match="`default_headers` and `set_default_headers` arguments are mutually exclusive",
         ):
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
+        await client.close()
 
-    def test_copy_default_query(self) -> None:
+    async def test_copy_default_query(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"foo": "bar"}
         )
@@ -1121,13 +1136,15 @@ def test_copy_default_query(self) -> None:
         ):
             client.copy(set_default_query={}, default_query={"foo": "Bar"})
 
-    def test_copy_signature(self) -> None:
+        await client.close()
+
+    def test_copy_signature(self, async_client: AsyncOpenAI) -> None:
         # ensure the same parameters that can be passed to the client are defined in the `.copy()` method
         init_signature = inspect.signature(
             # mypy doesn't like that we access the `__init__` property.
-            self.client.__init__,  # type: ignore[misc]
+            async_client.__init__,  # type: ignore[misc]
         )
-        copy_signature = inspect.signature(self.client.copy)
+        copy_signature = inspect.signature(async_client.copy)
         exclude_params = {"transport", "proxies", "_strict_response_validation"}
 
         for name in init_signature.parameters.keys():
@@ -1138,12 +1155,12 @@ def test_copy_signature(self) -> None:
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
     @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
-    def test_copy_build_request(self) -> None:
+    def test_copy_build_request(self, async_client: AsyncOpenAI) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
         def build_request(options: FinalRequestOptions) -> None:
-            client = self.client.copy()
-            client._build_request(options)
+            client_copy = async_client.copy()
+            client_copy._build_request(options)
 
         # ensure that the machinery is warmed up before tracing starts.
         build_request(options)
@@ -1200,12 +1217,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                     print(frame)
             raise AssertionError()
 
-    async def test_request_timeout(self) -> None:
-        request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
+    async def test_request_timeout(self, async_client: AsyncOpenAI) -> None:
+        request = async_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == DEFAULT_TIMEOUT
 
-        request = self.client._build_request(
+        request = async_client._build_request(
             FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))
         )
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1220,6 +1237,8 @@ async def test_client_timeout_option(self) -> None:
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
         assert timeout == httpx.Timeout(0)
 
+        await client.close()
+
     async def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         async with httpx.AsyncClient(timeout=None) as http_client:
@@ -1231,6 +1250,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == httpx.Timeout(None)
 
+            await client.close()
+
         # no timeout given to the httpx client should not use the httpx default
         async with httpx.AsyncClient() as http_client:
             client = AsyncOpenAI(
@@ -1241,6 +1262,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT
 
+            await client.close()
+
         # explicitly passing the default timeout currently results in it being ignored
         async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
             client = AsyncOpenAI(
@@ -1251,6 +1274,8 @@ async def test_http_client_timeout_option(self) -> None:
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
             assert timeout == DEFAULT_TIMEOUT  # our default
 
+            await client.close()
+
     def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             with httpx.Client() as http_client:
@@ -1261,15 +1286,15 @@ def test_invalid_http_client(self) -> None:
                     http_client=cast(Any, http_client),
                 )
 
-    def test_default_headers_option(self) -> None:
-        client = AsyncOpenAI(
+    async def test_default_headers_option(self) -> None:
+        test_client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
-        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = AsyncOpenAI(
+        test_client2 = AsyncOpenAI(
             base_url=base_url,
             api_key=api_key,
             _strict_response_validation=True,
@@ -1278,10 +1303,13 @@ def test_default_headers_option(self) -> None:
                 "X-Stainless-Lang": "my-overriding-header",
             },
         )
-        request = client2._build_request(FinalRequestOptions(method="get", url="/foo"))
+        request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+        await test_client.close()
+        await test_client2.close()
+
     async def test_validate_headers(self) -> None:
         client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         options = await client._prepare_options(FinalRequestOptions(method="get", url="/foo"))
@@ -1293,7 +1321,7 @@ async def test_validate_headers(self) -> None:
                 client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
-    def test_default_query_option(self) -> None:
+    async def test_default_query_option(self) -> None:
         client = AsyncOpenAI(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"}
         )
@@ -1311,8 +1339,10 @@ def test_default_query_option(self) -> None:
         url = httpx.URL(request.url)
         assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
-    def test_request_extra_json(self) -> None:
-        request = self.client._build_request(
+        await client.close()
+
+    def test_request_extra_json(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1323,7 +1353,7 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": False}
 
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1334,7 +1364,7 @@ def test_request_extra_json(self) -> None:
         assert data == {"baz": False}
 
         # `extra_json` takes priority over `json_data` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1345,8 +1375,8 @@ def test_request_extra_json(self) -> None:
         data = json.loads(request.content.decode("utf-8"))
         assert data == {"foo": "bar", "baz": None}
 
-    def test_request_extra_headers(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_headers(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1356,7 +1386,7 @@ def test_request_extra_headers(self) -> None:
         assert request.headers.get("X-Foo") == "Foo"
 
         # `extra_headers` takes priority over `default_headers` when keys clash
-        request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request(
+        request = client.with_options(default_headers={"X-Bar": "true"})._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1367,8 +1397,8 @@ def test_request_extra_headers(self) -> None:
         )
         assert request.headers.get("X-Bar") == "false"
 
-    def test_request_extra_query(self) -> None:
-        request = self.client._build_request(
+    def test_request_extra_query(self, client: OpenAI) -> None:
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1381,7 +1411,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1395,7 +1425,7 @@ def test_request_extra_query(self) -> None:
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
-        request = self.client._build_request(
+        request = client._build_request(
             FinalRequestOptions(
                 method="post",
                 url="/foo",
@@ -1438,7 +1468,7 @@ def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None:
         ]
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_basic_union_response(self, respx_mock: MockRouter) -> None:
+    async def test_basic_union_response(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         class Model1(BaseModel):
             name: str
 
@@ -1447,12 +1477,12 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_union_response_different_types(self, respx_mock: MockRouter) -> None:
+    async def test_union_response_different_types(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         """Union of objects with the same field name using a different type"""
 
         class Model1(BaseModel):
@@ -1463,18 +1493,20 @@ class Model2(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model2)
         assert response.foo == "bar"
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1}))
 
-        response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
+        response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2]))
         assert isinstance(response, Model1)
         assert response.foo == 1
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None:
+    async def test_non_application_json_content_type_for_json_data(
+        self, respx_mock: MockRouter, async_client: AsyncOpenAI
+    ) -> None:
         """
         Response that sets Content-Type to something other than application/json but returns json data
         """
@@ -1490,11 +1522,11 @@ class Model(BaseModel):
             )
         )
 
-        response = await self.client.get("/foo", cast_to=Model)
+        response = await async_client.get("/foo", cast_to=Model)
         assert isinstance(response, Model)
         assert response.foo == 2
 
-    def test_base_url_setter(self) -> None:
+    async def test_base_url_setter(self) -> None:
         client = AsyncOpenAI(
             base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True
         )
@@ -1504,7 +1536,9 @@ def test_base_url_setter(self) -> None:
 
         assert client.base_url == "https://example.com/from_setter/"
 
-    def test_base_url_env(self) -> None:
+        await client.close()
+
+    async def test_base_url_env(self) -> None:
         with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"):
             client = AsyncOpenAI(api_key=api_key, _strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
@@ -1524,7 +1558,7 @@ def test_base_url_env(self) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
+    async def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1533,6 +1567,7 @@ def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        await client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -1549,7 +1584,7 @@ def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
+    async def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1558,6 +1593,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
             ),
         )
         assert request.url == "http://localhost:5000/custom/path/foo"
+        await client.close()
 
     @pytest.mark.parametrize(
         "client",
@@ -1574,7 +1610,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None:
         ],
         ids=["standard", "custom http client"],
     )
-    def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
+    async def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1583,37 +1619,37 @@ def test_absolute_request_url(self, client: AsyncOpenAI) -> None:
             ),
         )
         assert request.url == "https://myapi.com/foo"
+        await client.close()
 
     async def test_copied_client_does_not_close_http(self) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
+        test_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        assert not test_client.is_closed()
 
-        copied = client.copy()
-        assert copied is not client
+        copied = test_client.copy()
+        assert copied is not test_client
 
         del copied
 
         await asyncio.sleep(0.2)
-        assert not client.is_closed()
+        assert not test_client.is_closed()
 
     async def test_client_context_manager(self) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        async with client as c2:
-            assert c2 is client
+        test_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        async with test_client as c2:
+            assert c2 is test_client
             assert not c2.is_closed()
-            assert not client.is_closed()
-        assert client.is_closed()
+            assert not test_client.is_closed()
+        assert test_client.is_closed()
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_client_response_validation_error(self, respx_mock: MockRouter) -> None:
+    async def test_client_response_validation_error(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         class Model(BaseModel):
             foo: str
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}}))
 
         with pytest.raises(APIResponseValidationError) as exc:
-            await self.client.get("/foo", cast_to=Model)
+            await async_client.get("/foo", cast_to=Model)
 
         assert isinstance(exc.value.__cause__, ValidationError)
 
@@ -1624,19 +1660,17 @@ async def test_client_max_retries_validation(self) -> None:
             )
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
-    async def test_default_stream_cls(self, respx_mock: MockRouter) -> None:
+    async def test_default_stream_cls(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         class Model(BaseModel):
             name: str
 
         respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
-        stream = await self.client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
+        stream = await async_client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model])
         assert isinstance(stream, AsyncStream)
         await stream.response.aclose()
 
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
         class Model(BaseModel):
             name: str
@@ -1648,11 +1682,14 @@ class Model(BaseModel):
         with pytest.raises(APIResponseValidationError):
             await strict_client.get("/foo", cast_to=Model)
 
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
+        non_strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False)
 
-        response = await client.get("/foo", cast_to=Model)
+        response = await non_strict_client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
 
+        await strict_client.close()
+        await non_strict_client.close()
+
     @pytest.mark.parametrize(
         "remaining_retries,retry_after,timeout",
         [
@@ -1675,13 +1712,12 @@ class Model(BaseModel):
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
-    @pytest.mark.asyncio
-    async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-
+    async def test_parse_retry_after_header(
+        self, remaining_retries: int, retry_after: str, timeout: float, async_client: AsyncOpenAI
+    ) -> None:
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
-        calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
+        calculated = async_client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
@@ -1700,7 +1736,7 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter,
                 model="gpt-4o",
             ).__aenter__()
 
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(async_client) == 0
 
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
@@ -1717,12 +1753,11 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter,
                 ],
                 model="gpt-4o",
             ).__aenter__()
-        assert _get_open_connections(self.client) == 0
+        assert _get_open_connections(async_client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     async def test_retries_taken(
         self,
@@ -1762,7 +1797,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_omit_retry_count_header(
         self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
@@ -1795,7 +1829,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_overwrite_retry_count_header(
         self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
@@ -1828,7 +1861,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    @pytest.mark.asyncio
     async def test_retries_taken_new_response_class(
         self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
@@ -1884,26 +1916,26 @@ async def test_default_client_creation(self) -> None:
         )
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_follow_redirects(self, respx_mock: MockRouter) -> None:
+    async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         # Test that the default follow_redirects=True allows following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
         respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"}))
 
-        response = await self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
+        response = await async_client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response)
         assert response.status_code == 200
         assert response.json() == {"status": "ok"}
 
     @pytest.mark.respx(base_url=base_url)
-    async def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
+    async def test_follow_redirects_disabled(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None:
         # Test that follow_redirects=False prevents following redirects
         respx_mock.post("/redirect").mock(
             return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"})
         )
 
         with pytest.raises(APIStatusError) as exc_info:
-            await self.client.post(
+            await async_client.post(
                 "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response
             )
 

From f7e9e9e4f43039f19a41375a6d2b2bdc2264dad7 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 16:14:32 +0000
Subject: [PATCH 6/8] chore(internal): grammar fix (it's -> its)

---
 src/openai/_utils/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index cddf2c8da4..90494748cc 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -137,7 +137,7 @@ def is_given(obj: _T | NotGiven | Omit) -> TypeGuard[_T]:
 # Type safe methods for narrowing types with TypeVars.
 # The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
 # however this cause Pyright to rightfully report errors. As we know we don't
-# care about the contained types we can safely use `object` in it's place.
+# care about the contained types we can safely use `object` in its place.
 #
 # There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
 # `is_*` is for when you're dealing with an unknown input

From 5b4399219d7ed326411aec524d25ef2b8e3152fc Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 23:07:04 +0000
Subject: [PATCH 7/8] feat(api): Realtime API token_limits, Hybrid searching
 ranking options

---
 .stats.yml                                    |  6 +--
 src/openai/resources/images.py                | 30 +++++++++++---
 src/openai/resources/realtime/calls.py        | 30 ++++++++++++--
 .../resources/vector_stores/file_batches.py   | 40 +++++++++++++------
 .../types/realtime/call_accept_params.py      | 15 ++++++-
 .../realtime_session_create_request.py        | 15 ++++++-
 .../realtime_session_create_request_param.py  | 15 ++++++-
 .../realtime_session_create_response.py       | 15 ++++++-
 .../realtime_truncation_retention_ratio.py    | 26 ++++++++++--
 ...altime_truncation_retention_ratio_param.py | 25 ++++++++++--
 .../types/responses/file_search_tool.py       | 16 +++++++-
 .../types/responses/file_search_tool_param.py | 16 +++++++-
 .../types/responses/response_output_text.py   |  6 +--
 .../responses/response_output_text_param.py   |  4 +-
 src/openai/types/responses/tool.py            |  2 +
 src/openai/types/responses/tool_param.py      |  2 +
 .../vector_stores/file_batch_create_params.py | 40 +++++++++++++++++--
 src/openai/types/video.py                     |  3 ++
 .../vector_stores/test_file_batches.py        | 26 +++++++-----
 19 files changed, 273 insertions(+), 59 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index bc4e084f99..d59fe71ee4 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
-openapi_spec_hash: 1560717860bba4105936647dde8f618d
-config_hash: 50ee3382a63c021a9f821a935950e926
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
+openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
+config_hash: 032995825500a503a76da119f5354905
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 9bb332230f..265be6f743 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -168,7 +168,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -282,7 +285,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -392,7 +398,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1046,7 +1055,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1160,7 +1172,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1270,7 +1285,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py
index a8c4761717..7d2c92fe86 100644
--- a/src/openai/resources/realtime/calls.py
+++ b/src/openai/resources/realtime/calls.py
@@ -195,8 +195,19 @@ def accept(
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
@@ -504,8 +515,19 @@ async def accept(
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/resources/vector_stores/file_batches.py b/src/openai/resources/vector_stores/file_batches.py
index 0f989821de..d31fb59bec 100644
--- a/src/openai/resources/vector_stores/file_batches.py
+++ b/src/openai/resources/vector_stores/file_batches.py
@@ -52,9 +52,10 @@ def create(
         self,
         vector_store_id: str,
         *,
-        file_ids: SequenceNotStr[str],
         attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
         chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -66,10 +67,6 @@ def create(
         Create a vector store file batch.
 
         Args:
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the vector store should use. Useful for tools like `file_search` that can access
-              files.
-
           attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -79,6 +76,16 @@ def create(
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
               strategy. Only applicable if `file_ids` is non-empty.
 
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -94,9 +101,10 @@ def create(
             f"/vector_stores/{vector_store_id}/file_batches",
             body=maybe_transform(
                 {
-                    "file_ids": file_ids,
                     "attributes": attributes,
                     "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
                 },
                 file_batch_create_params.FileBatchCreateParams,
             ),
@@ -389,9 +397,10 @@ async def create(
         self,
         vector_store_id: str,
         *,
-        file_ids: SequenceNotStr[str],
         attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
         chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -403,10 +412,6 @@ async def create(
         Create a vector store file batch.
 
         Args:
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the vector store should use. Useful for tools like `file_search` that can access
-              files.
-
           attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -416,6 +421,16 @@ async def create(
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
               strategy. Only applicable if `file_ids` is non-empty.
 
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -431,9 +446,10 @@ async def create(
             f"/vector_stores/{vector_store_id}/file_batches",
             body=await async_maybe_transform(
                 {
-                    "file_ids": file_ids,
                     "attributes": attributes,
                     "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
                 },
                 file_batch_create_params.FileBatchCreateParams,
             ),
diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py
index 0cfb01e7cf..d6fc92b8e5 100644
--- a/src/openai/types/realtime/call_accept_params.py
+++ b/src/openai/types/realtime/call_accept_params.py
@@ -106,6 +106,17 @@ class CallAcceptParams(TypedDict, total=False):
 
     truncation: RealtimeTruncationParam
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py
index bc205bd3b5..016ae45b67 100644
--- a/src/openai/types/realtime/realtime_session_create_request.py
+++ b/src/openai/types/realtime/realtime_session_create_request.py
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequest(BaseModel):
 
     truncation: Optional[RealtimeTruncation] = None
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py
index d1fa2b35d2..8c3998c1ca 100644
--- a/src/openai/types/realtime/realtime_session_create_request_param.py
+++ b/src/openai/types/realtime/realtime_session_create_request_param.py
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
 
     truncation: RealtimeTruncationParam
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py
index bb6b94e900..c1336cd6e4 100644
--- a/src/openai/types/realtime/realtime_session_create_response.py
+++ b/src/openai/types/realtime/realtime_session_create_response.py
@@ -459,6 +459,17 @@ class RealtimeSessionCreateResponse(BaseModel):
 
     truncation: Optional[RealtimeTruncation] = None
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
diff --git a/src/openai/types/realtime/realtime_truncation_retention_ratio.py b/src/openai/types/realtime/realtime_truncation_retention_ratio.py
index b40427244e..e19ed64831 100644
--- a/src/openai/types/realtime/realtime_truncation_retention_ratio.py
+++ b/src/openai/types/realtime/realtime_truncation_retention_ratio.py
@@ -1,18 +1,38 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
 
-__all__ = ["RealtimeTruncationRetentionRatio"]
+__all__ = ["RealtimeTruncationRetentionRatio", "TokenLimits"]
+
+
+class TokenLimits(BaseModel):
+    post_instructions: Optional[int] = None
+    """
+    Maximum tokens allowed in the conversation after instructions (which including
+    tool definitions). For example, setting this to 5,000 would mean that truncation
+    would occur when the conversation exceeds 5,000 tokens after instructions. This
+    cannot be higher than the model's context window size minus the maximum output
+    tokens.
+    """
 
 
 class RealtimeTruncationRetentionRatio(BaseModel):
     retention_ratio: float
     """
-    Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
-    conversation exceeds the input token limit.
+    Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+    the conversation exceeds the input token limit. Setting this to `0.8` means that
+    messages will be dropped until 80% of the maximum allowed tokens are used. This
+    helps reduce the frequency of truncations and improve cache rates.
     """
 
     type: Literal["retention_ratio"]
     """Use retention ratio truncation."""
+
+    token_limits: Optional[TokenLimits] = None
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
diff --git a/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py b/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
index b65d65666a..4ea80fe4ce 100644
--- a/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
+++ b/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py
@@ -4,15 +4,34 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["RealtimeTruncationRetentionRatioParam"]
+__all__ = ["RealtimeTruncationRetentionRatioParam", "TokenLimits"]
+
+
+class TokenLimits(TypedDict, total=False):
+    post_instructions: int
+    """
+    Maximum tokens allowed in the conversation after instructions (which including
+    tool definitions). For example, setting this to 5,000 would mean that truncation
+    would occur when the conversation exceeds 5,000 tokens after instructions. This
+    cannot be higher than the model's context window size minus the maximum output
+    tokens.
+    """
 
 
 class RealtimeTruncationRetentionRatioParam(TypedDict, total=False):
     retention_ratio: Required[float]
     """
-    Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the
-    conversation exceeds the input token limit.
+    Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when
+    the conversation exceeds the input token limit. Setting this to `0.8` means that
+    messages will be dropped until 80% of the maximum allowed tokens are used. This
+    helps reduce the frequency of truncations and improve cache rates.
     """
 
     type: Required[Literal["retention_ratio"]]
     """Use retention ratio truncation."""
+
+    token_limits: TokenLimits
+    """Optional custom token limits for this truncation strategy.
+
+    If not provided, the model's default token limits will be used.
+    """
diff --git a/src/openai/types/responses/file_search_tool.py b/src/openai/types/responses/file_search_tool.py
index dbdd8cffab..d0d08a323f 100644
--- a/src/openai/types/responses/file_search_tool.py
+++ b/src/openai/types/responses/file_search_tool.py
@@ -7,12 +7,26 @@
 from ..shared.compound_filter import CompoundFilter
 from ..shared.comparison_filter import ComparisonFilter
 
-__all__ = ["FileSearchTool", "Filters", "RankingOptions"]
+__all__ = ["FileSearchTool", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
 
 Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter, None]
 
 
+class RankingOptionsHybridSearch(BaseModel):
+    embedding_weight: float
+    """The weight of the embedding in the reciprocal ranking fusion."""
+
+    text_weight: float
+    """The weight of the text in the reciprocal ranking fusion."""
+
+
 class RankingOptions(BaseModel):
+    hybrid_search: Optional[RankingOptionsHybridSearch] = None
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding
+    matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
     ranker: Optional[Literal["auto", "default-2024-11-15"]] = None
     """The ranker to use for the file search."""
 
diff --git a/src/openai/types/responses/file_search_tool_param.py b/src/openai/types/responses/file_search_tool_param.py
index c7641c1b86..b37a669ebd 100644
--- a/src/openai/types/responses/file_search_tool_param.py
+++ b/src/openai/types/responses/file_search_tool_param.py
@@ -9,12 +9,26 @@
 from ..shared_params.compound_filter import CompoundFilter
 from ..shared_params.comparison_filter import ComparisonFilter
 
-__all__ = ["FileSearchToolParam", "Filters", "RankingOptions"]
+__all__ = ["FileSearchToolParam", "Filters", "RankingOptions", "RankingOptionsHybridSearch"]
 
 Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
 
 
+class RankingOptionsHybridSearch(TypedDict, total=False):
+    embedding_weight: Required[float]
+    """The weight of the embedding in the reciprocal ranking fusion."""
+
+    text_weight: Required[float]
+    """The weight of the text in the reciprocal ranking fusion."""
+
+
 class RankingOptions(TypedDict, total=False):
+    hybrid_search: RankingOptionsHybridSearch
+    """
+    Weights that control how reciprocal rank fusion balances semantic embedding
+    matches versus sparse keyword matches when hybrid search is enabled.
+    """
+
     ranker: Literal["auto", "default-2024-11-15"]
     """The ranker to use for the file search."""
 
diff --git a/src/openai/types/responses/response_output_text.py b/src/openai/types/responses/response_output_text.py
index aa97b629f0..fc579cd894 100644
--- a/src/openai/types/responses/response_output_text.py
+++ b/src/openai/types/responses/response_output_text.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union, Optional
+from typing import List, Union
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from ..._utils import PropertyInfo
@@ -108,10 +108,10 @@ class ResponseOutputText(BaseModel):
     annotations: List[Annotation]
     """The annotations of the text output."""
 
+    logprobs: List[Logprob]
+
     text: str
     """The text output from the model."""
 
     type: Literal["output_text"]
     """The type of the output text. Always `output_text`."""
-
-    logprobs: Optional[List[Logprob]] = None
diff --git a/src/openai/types/responses/response_output_text_param.py b/src/openai/types/responses/response_output_text_param.py
index 63d2d394a8..445a308a5b 100644
--- a/src/openai/types/responses/response_output_text_param.py
+++ b/src/openai/types/responses/response_output_text_param.py
@@ -106,10 +106,10 @@ class ResponseOutputTextParam(TypedDict, total=False):
     annotations: Required[Iterable[Annotation]]
     """The annotations of the text output."""
 
+    logprobs: Required[Iterable[Logprob]]
+
     text: Required[str]
     """The text output from the model."""
 
     type: Required[Literal["output_text"]]
     """The type of the output text. Always `output_text`."""
-
-    logprobs: Iterable[Logprob]
diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py
index 6239b818c9..b29fede0c9 100644
--- a/src/openai/types/responses/tool.py
+++ b/src/openai/types/responses/tool.py
@@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(BaseModel):
     file_ids: Optional[List[str]] = None
     """An optional list of uploaded files to make available to your code."""
 
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+
 
 CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
 
diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py
index ff4ac2b953..dd1ea0bd54 100644
--- a/src/openai/types/responses/tool_param.py
+++ b/src/openai/types/responses/tool_param.py
@@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(TypedDict, total=False):
     file_ids: SequenceNotStr[str]
     """An optional list of uploaded files to make available to your code."""
 
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]]
+
 
 CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto]
 
diff --git a/src/openai/types/vector_stores/file_batch_create_params.py b/src/openai/types/vector_stores/file_batch_create_params.py
index d8d7b44888..2ab98a83ab 100644
--- a/src/openai/types/vector_stores/file_batch_create_params.py
+++ b/src/openai/types/vector_stores/file_batch_create_params.py
@@ -2,20 +2,54 @@
 
 from __future__ import annotations
 
-from typing import Dict, Union, Optional
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Required, TypedDict
 
 from ..._types import SequenceNotStr
 from ..file_chunking_strategy_param import FileChunkingStrategyParam
 
-__all__ = ["FileBatchCreateParams"]
+__all__ = ["FileBatchCreateParams", "File"]
 
 
 class FileBatchCreateParams(TypedDict, total=False):
-    file_ids: Required[SequenceNotStr[str]]
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
+
+    file_ids: SequenceNotStr[str]
     """
     A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
     the vector store should use. Useful for tools like `file_search` that can access
+    files. If `attributes` or `chunking_strategy` are provided, they will be applied
+    to all files in the batch. Mutually exclusive with `files`.
+    """
+
+    files: Iterable[File]
+    """
+    A list of objects that each include a `file_id` plus optional `attributes` or
+    `chunking_strategy`. Use this when you need to override metadata for specific
+    files. The global `attributes` or `chunking_strategy` will be ignored and must
+    be specified for each file. Mutually exclusive with `file_ids`.
+    """
+
+
+class File(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
     files.
     """
 
diff --git a/src/openai/types/video.py b/src/openai/types/video.py
index 2c804f75b8..22ee3a11f7 100644
--- a/src/openai/types/video.py
+++ b/src/openai/types/video.py
@@ -37,6 +37,9 @@ class Video(BaseModel):
     progress: int
     """Approximate completion percentage for the generation task."""
 
+    prompt: Optional[str] = None
+    """The prompt that was used to generate the video."""
+
     remixed_from_video_id: Optional[str] = None
     """Identifier of the source video if this video is a remix."""
 
diff --git a/tests/api_resources/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py
index ac678ce912..abbefc20e9 100644
--- a/tests/api_resources/vector_stores/test_file_batches.py
+++ b/tests/api_resources/vector_stores/test_file_batches.py
@@ -25,7 +25,6 @@ class TestFileBatches:
     def test_method_create(self, client: OpenAI) -> None:
         file_batch = client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -33,9 +32,16 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         file_batch = client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
             attributes={"foo": "string"},
             chunking_strategy={"type": "auto"},
+            file_ids=["string"],
+            files=[
+                {
+                    "file_id": "file_id",
+                    "attributes": {"foo": "string"},
+                    "chunking_strategy": {"type": "auto"},
+                }
+            ],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -43,7 +49,6 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.vector_stores.file_batches.with_raw_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
 
         assert response.is_closed is True
@@ -55,7 +60,6 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.vector_stores.file_batches.with_streaming_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -70,7 +74,6 @@ def test_path_params_create(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
             client.vector_stores.file_batches.with_raw_response.create(
                 vector_store_id="",
-                file_ids=["string"],
             )
 
     @parametrize
@@ -240,7 +243,6 @@ class TestAsyncFileBatches:
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         file_batch = await async_client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -248,9 +250,16 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         file_batch = await async_client.vector_stores.file_batches.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
             attributes={"foo": "string"},
             chunking_strategy={"type": "auto"},
+            file_ids=["string"],
+            files=[
+                {
+                    "file_id": "file_id",
+                    "attributes": {"foo": "string"},
+                    "chunking_strategy": {"type": "auto"},
+                }
+            ],
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
@@ -258,7 +267,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.vector_stores.file_batches.with_raw_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         )
 
         assert response.is_closed is True
@@ -270,7 +278,6 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.vector_stores.file_batches.with_streaming_response.create(
             vector_store_id="vs_abc123",
-            file_ids=["string"],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -285,7 +292,6 @@ async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
             await async_client.vector_stores.file_batches.with_raw_response.create(
                 vector_store_id="",
-                file_ids=["string"],
             )
 
     @parametrize

From 7512e60d92fd2c42d40c1d606fbb59d231b13f20 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 3 Nov 2025 23:07:37 +0000
Subject: [PATCH 8/8] release: 2.7.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 22 ++++++++++++++++++++++
 pyproject.toml                |  2 +-
 src/openai/_version.py        |  2 +-
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 2f8909f197..d1328ca9c9 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.6.1"
+  ".": "2.7.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ce541566d..516368cfed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,27 @@
 # Changelog
 
+## 2.7.0 (2025-11-03)
+
+Full Changelog: [v2.6.1...v2.7.0](https://github.com/openai/openai-python/compare/v2.6.1...v2.7.0)
+
+### Features
+
+* **api:** Realtime API token_limits, Hybrid searching ranking options ([5b43992](https://github.com/openai/openai-python/commit/5b4399219d7ed326411aec524d25ef2b8e3152fc))
+* **api:** remove InputAudio from ResponseInputContent ([bd70a33](https://github.com/openai/openai-python/commit/bd70a33234741fa68c185105e4f53cc0275a2a50))
+
+
+### Bug Fixes
+
+* **client:** close streams without requiring full consumption ([d8bb7d6](https://github.com/openai/openai-python/commit/d8bb7d6d728c5481de4198eebe668b67803ae14a))
+* **readme:** update realtime examples ([#2714](https://github.com/openai/openai-python/issues/2714)) ([d0370a8](https://github.com/openai/openai-python/commit/d0370a8d61fc2f710a34d8aad48f649a9683106d))
+* **uploads:** avoid file handle leak ([4f1b691](https://github.com/openai/openai-python/commit/4f1b691ab4db41aebd397ec41942b43fb0f0743c))
+
+
+### Chores
+
+* **internal/tests:** avoid race condition with implicit client cleanup ([933d23b](https://github.com/openai/openai-python/commit/933d23bd8d7809c77e0796becfe052167d44d40a))
+* **internal:** grammar fix (it's -&gt; its) ([f7e9e9e](https://github.com/openai/openai-python/commit/f7e9e9e4f43039f19a41375a6d2b2bdc2264dad7))
+
 ## 2.6.1 (2025-10-24)
 
 Full Changelog: [v2.6.0...v2.6.1](https://github.com/openai/openai-python/compare/v2.6.0...v2.6.1)
diff --git a/pyproject.toml b/pyproject.toml
index e96101b51c..8ff272e18e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "2.6.1"
+version = "2.7.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/openai/_version.py b/src/openai/_version.py
index b0fe817996..0963c9c373 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "2.6.1"  # x-release-please-version
+__version__ = "2.7.0"  # x-release-please-version