From bd70a33234741fa68c185105e4f53cc0275a2a50 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 28 Oct 2025 00:54:12 +0000 Subject: [PATCH 1/8] feat(api): remove InputAudio from ResponseInputContent Removes the type `InputAudio` from `ResponseInputContent`. This parameter was non-functional and has now been removed. Please note that this is not a feature removal; it was never supported by the Responses API. While this is technically a backward-incompatible change due to the type removal, it reflects the intended behavior and has no functional impact. --- .stats.yml | 4 ++-- src/openai/types/responses/response_input_content.py | 4 +--- src/openai/types/responses/response_input_content_param.py | 5 +---- .../responses/response_input_message_content_list_param.py | 5 +---- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/.stats.yml b/.stats.yml index b4309cd4c3..bc4e084f99 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 136 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a3c45d9bd3bb25bf4eaa49b7fb473a00038293dec659ffaa44f624ded884abf4.yml -openapi_spec_hash: 9c20aaf786a0700dabd13d9865481c9e +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml +openapi_spec_hash: 1560717860bba4105936647dde8f618d config_hash: 50ee3382a63c021a9f821a935950e926 diff --git a/src/openai/types/responses/response_input_content.py b/src/openai/types/responses/response_input_content.py index 376b9ffce8..1726909a17 100644 --- a/src/openai/types/responses/response_input_content.py +++ b/src/openai/types/responses/response_input_content.py @@ -6,12 +6,10 @@ from ..._utils import PropertyInfo from .response_input_file import ResponseInputFile from .response_input_text import ResponseInputText -from .response_input_audio import ResponseInputAudio from .response_input_image import ResponseInputImage __all__ = ["ResponseInputContent"] ResponseInputContent: TypeAlias = Annotated[ - Union[ResponseInputText, ResponseInputImage, ResponseInputFile, ResponseInputAudio], - PropertyInfo(discriminator="type"), + Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type") ] diff --git a/src/openai/types/responses/response_input_content_param.py b/src/openai/types/responses/response_input_content_param.py index a95e026a53..7791cdfd8e 100644 --- a/src/openai/types/responses/response_input_content_param.py +++ b/src/openai/types/responses/response_input_content_param.py @@ -7,11 +7,8 @@ from .response_input_file_param import ResponseInputFileParam from .response_input_text_param import ResponseInputTextParam -from .response_input_audio_param import ResponseInputAudioParam from .response_input_image_param import ResponseInputImageParam __all__ = ["ResponseInputContentParam"] -ResponseInputContentParam: TypeAlias = Union[ - ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam, ResponseInputAudioParam -] +ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam] diff --git a/src/openai/types/responses/response_input_message_content_list_param.py b/src/openai/types/responses/response_input_message_content_list_param.py index 8e3778d15a..080613df0d 100644 --- a/src/openai/types/responses/response_input_message_content_list_param.py +++ b/src/openai/types/responses/response_input_message_content_list_param.py @@ -7,13 +7,10 @@ from .response_input_file_param import ResponseInputFileParam from .response_input_text_param import ResponseInputTextParam -from .response_input_audio_param import ResponseInputAudioParam from .response_input_image_param import ResponseInputImageParam __all__ = ["ResponseInputMessageContentListParam", "ResponseInputContentParam"] -ResponseInputContentParam: TypeAlias = Union[ - ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam, ResponseInputAudioParam -] +ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam] ResponseInputMessageContentListParam: TypeAlias = List[ResponseInputContentParam] From d8bb7d6d728c5481de4198eebe668b67803ae14a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 10:42:58 +0000 Subject: [PATCH 2/8] fix(client): close streams without requiring full consumption --- src/openai/_streaming.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py index f586de74ff..05c284a2be 100644 --- a/src/openai/_streaming.py +++ b/src/openai/_streaming.py @@ -96,9 +96,8 @@ def __stream__(self) -> Iterator[_T]: yield process_data(data=data, cast_to=cast_to, response=response) - # Ensure the entire stream is consumed - for _sse in iterator: - ... + # As we might not fully consume the response stream, we need to close it explicitly + response.close() def __enter__(self) -> Self: return self @@ -198,9 +197,8 @@ async def __stream__(self) -> AsyncIterator[_T]: yield process_data(data=data, cast_to=cast_to, response=response) - # Ensure the entire stream is consumed - async for _sse in iterator: - ... + # As we might not fully consume the response stream, we need to close it explicitly + await response.aclose() async def __aenter__(self) -> Self: return self From d0370a8d61fc2f710a34d8aad48f649a9683106d Mon Sep 17 00:00:00 2001 From: Dan Martins Date: Wed, 29 Oct 2025 08:53:08 -0400 Subject: [PATCH 3/8] fix(readme): update realtime examples (#2714) --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9311b477a3..21f79312a1 100644 --- a/README.md +++ b/README.md @@ -244,7 +244,9 @@ async def main(): client = AsyncOpenAI() async with client.realtime.connect(model="gpt-realtime") as connection: - await connection.session.update(session={'modalities': ['text']}) + await connection.session.update( + session={"type": "realtime", "output_modalities": ["text"]} + ) await connection.conversation.item.create( item={ @@ -256,10 +258,10 @@ async def main(): await connection.response.create() async for event in connection: - if event.type == 'response.text.delta': + if event.type == "response.output_text.delta": print(event.delta, flush=True, end="") - elif event.type == 'response.text.done': + elif event.type == "response.output_text.done": print() elif event.type == "response.done": From 4f1b691ab4db41aebd397ec41942b43fb0f0743c Mon Sep 17 00:00:00 2001 From: Showmick Das Date: Thu, 30 Oct 2025 05:52:29 -0400 Subject: [PATCH 4/8] fix(uploads): avoid file handle leak --- src/openai/resources/uploads/uploads.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py index 8953256f2a..e8c047bd4f 100644 --- a/src/openai/resources/uploads/uploads.py +++ b/src/openai/resources/uploads/uploads.py @@ -157,9 +157,8 @@ def upload_file_chunked( part = self.parts.create(upload_id=upload.id, data=data) log.info("Uploaded part %s for upload %s", part.id, upload.id) part_ids.append(part.id) - except Exception: + finally: buf.close() - raise return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5) @@ -465,9 +464,8 @@ async def upload_file_chunked( part = await self.parts.create(upload_id=upload.id, data=data) log.info("Uploaded part %s for upload %s", part.id, upload.id) part_ids.append(part.id) - except Exception: + finally: buf.close() - raise return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5) From 933d23bd8d7809c77e0796becfe052167d44d40a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:05:12 +0000 Subject: [PATCH 5/8] chore(internal/tests): avoid race condition with implicit client cleanup --- tests/test_client.py | 372 +++++++++++++++++++++++-------------------- 1 file changed, 202 insertions(+), 170 deletions(-) diff --git a/tests/test_client.py b/tests/test_client.py index 3287e0e706..e8d62f17f7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -64,51 +64,49 @@ def _get_open_connections(client: OpenAI | AsyncOpenAI) -> int: class TestOpenAI: - client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - @pytest.mark.respx(base_url=base_url) - def test_raw_response(self, respx_mock: MockRouter) -> None: + def test_raw_response(self, respx_mock: MockRouter, client: OpenAI) -> None: respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - response = self.client.post("/foo", cast_to=httpx.Response) + response = client.post("/foo", cast_to=httpx.Response) assert response.status_code == 200 assert isinstance(response, httpx.Response) assert response.json() == {"foo": "bar"} @pytest.mark.respx(base_url=base_url) - def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None: + def test_raw_response_for_binary(self, respx_mock: MockRouter, client: OpenAI) -> None: respx_mock.post("/foo").mock( return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}') ) - response = self.client.post("/foo", cast_to=httpx.Response) + response = client.post("/foo", cast_to=httpx.Response) assert response.status_code == 200 assert isinstance(response, httpx.Response) assert response.json() == {"foo": "bar"} - def test_copy(self) -> None: - copied = self.client.copy() - assert id(copied) != id(self.client) + def test_copy(self, client: OpenAI) -> None: + copied = client.copy() + assert id(copied) != id(client) - copied = self.client.copy(api_key="another My API Key") + copied = client.copy(api_key="another My API Key") assert copied.api_key == "another My API Key" - assert self.client.api_key == "My API Key" + assert client.api_key == "My API Key" - def test_copy_default_options(self) -> None: + def test_copy_default_options(self, client: OpenAI) -> None: # options that have a default are overridden correctly - copied = self.client.copy(max_retries=7) + copied = client.copy(max_retries=7) assert copied.max_retries == 7 - assert self.client.max_retries == 2 + assert client.max_retries == 2 copied2 = copied.copy(max_retries=6) assert copied2.max_retries == 6 assert copied.max_retries == 7 # timeout - assert isinstance(self.client.timeout, httpx.Timeout) - copied = self.client.copy(timeout=None) + assert isinstance(client.timeout, httpx.Timeout) + copied = client.copy(timeout=None) assert copied.timeout is None - assert isinstance(self.client.timeout, httpx.Timeout) + assert isinstance(client.timeout, httpx.Timeout) def test_copy_default_headers(self) -> None: client = OpenAI( @@ -143,6 +141,7 @@ def test_copy_default_headers(self) -> None: match="`default_headers` and `set_default_headers` arguments are mutually exclusive", ): client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"}) + client.close() def test_copy_default_query(self) -> None: client = OpenAI( @@ -180,13 +179,15 @@ def test_copy_default_query(self) -> None: ): client.copy(set_default_query={}, default_query={"foo": "Bar"}) - def test_copy_signature(self) -> None: + client.close() + + def test_copy_signature(self, client: OpenAI) -> None: # ensure the same parameters that can be passed to the client are defined in the `.copy()` method init_signature = inspect.signature( # mypy doesn't like that we access the `__init__` property. - self.client.__init__, # type: ignore[misc] + client.__init__, # type: ignore[misc] ) - copy_signature = inspect.signature(self.client.copy) + copy_signature = inspect.signature(client.copy) exclude_params = {"transport", "proxies", "_strict_response_validation"} for name in init_signature.parameters.keys(): @@ -197,12 +198,12 @@ def test_copy_signature(self) -> None: assert copy_param is not None, f"copy() signature is missing the {name} param" @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12") - def test_copy_build_request(self) -> None: + def test_copy_build_request(self, client: OpenAI) -> None: options = FinalRequestOptions(method="get", url="/foo") def build_request(options: FinalRequestOptions) -> None: - client = self.client.copy() - client._build_request(options) + client_copy = client.copy() + client_copy._build_request(options) # ensure that the machinery is warmed up before tracing starts. build_request(options) @@ -259,14 +260,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic print(frame) raise AssertionError() - def test_request_timeout(self) -> None: - request = self.client._build_request(FinalRequestOptions(method="get", url="/foo")) + def test_request_timeout(self, client: OpenAI) -> None: + request = client._build_request(FinalRequestOptions(method="get", url="/foo")) timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == DEFAULT_TIMEOUT - request = self.client._build_request( - FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)) - ) + request = client._build_request(FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0))) timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == httpx.Timeout(100.0) @@ -277,6 +276,8 @@ def test_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == httpx.Timeout(0) + client.close() + def test_http_client_timeout_option(self) -> None: # custom timeout given to the httpx client should be used with httpx.Client(timeout=None) as http_client: @@ -288,6 +289,8 @@ def test_http_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == httpx.Timeout(None) + client.close() + # no timeout given to the httpx client should not use the httpx default with httpx.Client() as http_client: client = OpenAI( @@ -298,6 +301,8 @@ def test_http_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == DEFAULT_TIMEOUT + client.close() + # explicitly passing the default timeout currently results in it being ignored with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client: client = OpenAI( @@ -308,6 +313,8 @@ def test_http_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == DEFAULT_TIMEOUT # our default + client.close() + async def test_invalid_http_client(self) -> None: with pytest.raises(TypeError, match="Invalid `http_client` arg"): async with httpx.AsyncClient() as http_client: @@ -319,14 +326,14 @@ async def test_invalid_http_client(self) -> None: ) def test_default_headers_option(self) -> None: - client = OpenAI( + test_client = OpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"} ) - request = client._build_request(FinalRequestOptions(method="get", url="/foo")) + request = test_client._build_request(FinalRequestOptions(method="get", url="/foo")) assert request.headers.get("x-foo") == "bar" assert request.headers.get("x-stainless-lang") == "python" - client2 = OpenAI( + test_client2 = OpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, @@ -335,10 +342,13 @@ def test_default_headers_option(self) -> None: "X-Stainless-Lang": "my-overriding-header", }, ) - request = client2._build_request(FinalRequestOptions(method="get", url="/foo")) + request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo")) assert request.headers.get("x-foo") == "stainless" assert request.headers.get("x-stainless-lang") == "my-overriding-header" + test_client.close() + test_client2.close() + def test_validate_headers(self) -> None: client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) options = client._prepare_options(FinalRequestOptions(method="get", url="/foo")) @@ -369,8 +379,10 @@ def test_default_query_option(self) -> None: url = httpx.URL(request.url) assert dict(url.params) == {"foo": "baz", "query_param": "overridden"} - def test_request_extra_json(self) -> None: - request = self.client._build_request( + client.close() + + def test_request_extra_json(self, client: OpenAI) -> None: + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -381,7 +393,7 @@ def test_request_extra_json(self) -> None: data = json.loads(request.content.decode("utf-8")) assert data == {"foo": "bar", "baz": False} - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -392,7 +404,7 @@ def test_request_extra_json(self) -> None: assert data == {"baz": False} # `extra_json` takes priority over `json_data` when keys clash - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -403,8 +415,8 @@ def test_request_extra_json(self) -> None: data = json.loads(request.content.decode("utf-8")) assert data == {"foo": "bar", "baz": None} - def test_request_extra_headers(self) -> None: - request = self.client._build_request( + def test_request_extra_headers(self, client: OpenAI) -> None: + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -414,7 +426,7 @@ def test_request_extra_headers(self) -> None: assert request.headers.get("X-Foo") == "Foo" # `extra_headers` takes priority over `default_headers` when keys clash - request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request( + request = client.with_options(default_headers={"X-Bar": "true"})._build_request( FinalRequestOptions( method="post", url="/foo", @@ -425,8 +437,8 @@ def test_request_extra_headers(self) -> None: ) assert request.headers.get("X-Bar") == "false" - def test_request_extra_query(self) -> None: - request = self.client._build_request( + def test_request_extra_query(self, client: OpenAI) -> None: + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -439,7 +451,7 @@ def test_request_extra_query(self) -> None: assert params == {"my_query_param": "Foo"} # if both `query` and `extra_query` are given, they are merged - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -453,7 +465,7 @@ def test_request_extra_query(self) -> None: assert params == {"bar": "1", "foo": "2"} # `extra_query` takes priority over `query` when keys clash - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -496,7 +508,7 @@ def test_multipart_repeating_array(self, client: OpenAI) -> None: ] @pytest.mark.respx(base_url=base_url) - def test_basic_union_response(self, respx_mock: MockRouter) -> None: + def test_basic_union_response(self, respx_mock: MockRouter, client: OpenAI) -> None: class Model1(BaseModel): name: str @@ -505,12 +517,12 @@ class Model2(BaseModel): respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) + response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) assert isinstance(response, Model2) assert response.foo == "bar" @pytest.mark.respx(base_url=base_url) - def test_union_response_different_types(self, respx_mock: MockRouter) -> None: + def test_union_response_different_types(self, respx_mock: MockRouter, client: OpenAI) -> None: """Union of objects with the same field name using a different type""" class Model1(BaseModel): @@ -521,18 +533,18 @@ class Model2(BaseModel): respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) + response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) assert isinstance(response, Model2) assert response.foo == "bar" respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1})) - response = self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) + response = client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) assert isinstance(response, Model1) assert response.foo == 1 @pytest.mark.respx(base_url=base_url) - def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None: + def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter, client: OpenAI) -> None: """ Response that sets Content-Type to something other than application/json but returns json data """ @@ -548,7 +560,7 @@ class Model(BaseModel): ) ) - response = self.client.get("/foo", cast_to=Model) + response = client.get("/foo", cast_to=Model) assert isinstance(response, Model) assert response.foo == 2 @@ -560,6 +572,8 @@ def test_base_url_setter(self) -> None: assert client.base_url == "https://example.com/from_setter/" + client.close() + def test_base_url_env(self) -> None: with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"): client = OpenAI(api_key=api_key, _strict_response_validation=True) @@ -587,6 +601,7 @@ def test_base_url_trailing_slash(self, client: OpenAI) -> None: ), ) assert request.url == "http://localhost:5000/custom/path/foo" + client.close() @pytest.mark.parametrize( "client", @@ -610,6 +625,7 @@ def test_base_url_no_trailing_slash(self, client: OpenAI) -> None: ), ) assert request.url == "http://localhost:5000/custom/path/foo" + client.close() @pytest.mark.parametrize( "client", @@ -633,35 +649,36 @@ def test_absolute_request_url(self, client: OpenAI) -> None: ), ) assert request.url == "https://myapi.com/foo" + client.close() def test_copied_client_does_not_close_http(self) -> None: - client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - assert not client.is_closed() + test_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) + assert not test_client.is_closed() - copied = client.copy() - assert copied is not client + copied = test_client.copy() + assert copied is not test_client del copied - assert not client.is_closed() + assert not test_client.is_closed() def test_client_context_manager(self) -> None: - client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - with client as c2: - assert c2 is client + test_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) + with test_client as c2: + assert c2 is test_client assert not c2.is_closed() - assert not client.is_closed() - assert client.is_closed() + assert not test_client.is_closed() + assert test_client.is_closed() @pytest.mark.respx(base_url=base_url) - def test_client_response_validation_error(self, respx_mock: MockRouter) -> None: + def test_client_response_validation_error(self, respx_mock: MockRouter, client: OpenAI) -> None: class Model(BaseModel): foo: str respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}})) with pytest.raises(APIResponseValidationError) as exc: - self.client.get("/foo", cast_to=Model) + client.get("/foo", cast_to=Model) assert isinstance(exc.value.__cause__, ValidationError) @@ -670,13 +687,13 @@ def test_client_max_retries_validation(self) -> None: OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None)) @pytest.mark.respx(base_url=base_url) - def test_default_stream_cls(self, respx_mock: MockRouter) -> None: + def test_default_stream_cls(self, respx_mock: MockRouter, client: OpenAI) -> None: class Model(BaseModel): name: str respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - stream = self.client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model]) + stream = client.post("/foo", cast_to=Model, stream=True, stream_cls=Stream[Model]) assert isinstance(stream, Stream) stream.response.close() @@ -692,11 +709,14 @@ class Model(BaseModel): with pytest.raises(APIResponseValidationError): strict_client.get("/foo", cast_to=Model) - client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False) + non_strict_client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False) - response = client.get("/foo", cast_to=Model) + response = non_strict_client.get("/foo", cast_to=Model) assert isinstance(response, str) # type: ignore[unreachable] + strict_client.close() + non_strict_client.close() + @pytest.mark.parametrize( "remaining_retries,retry_after,timeout", [ @@ -719,9 +739,9 @@ class Model(BaseModel): ], ) @mock.patch("time.time", mock.MagicMock(return_value=1696004797)) - def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None: - client = OpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - + def test_parse_retry_after_header( + self, remaining_retries: int, retry_after: str, timeout: float, client: OpenAI + ) -> None: headers = httpx.Headers({"retry-after": retry_after}) options = FinalRequestOptions(method="get", url="/foo", max_retries=3) calculated = client._calculate_retry_timeout(remaining_retries, options, headers) @@ -743,7 +763,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, clien model="gpt-4o", ).__enter__() - assert _get_open_connections(self.client) == 0 + assert _get_open_connections(client) == 0 @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) @@ -760,7 +780,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client ], model="gpt-4o", ).__enter__() - assert _get_open_connections(self.client) == 0 + assert _get_open_connections(client) == 0 @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @@ -919,28 +939,26 @@ def test_default_client_creation(self) -> None: ) @pytest.mark.respx(base_url=base_url) - def test_follow_redirects(self, respx_mock: MockRouter) -> None: + def test_follow_redirects(self, respx_mock: MockRouter, client: OpenAI) -> None: # Test that the default follow_redirects=True allows following redirects respx_mock.post("/redirect").mock( return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"}) ) respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"})) - response = self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response) + response = client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response) assert response.status_code == 200 assert response.json() == {"status": "ok"} @pytest.mark.respx(base_url=base_url) - def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None: + def test_follow_redirects_disabled(self, respx_mock: MockRouter, client: OpenAI) -> None: # Test that follow_redirects=False prevents following redirects respx_mock.post("/redirect").mock( return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"}) ) with pytest.raises(APIStatusError) as exc_info: - self.client.post( - "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response - ) + client.post("/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response) assert exc_info.value.response.status_code == 302 assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected" @@ -1003,55 +1021,51 @@ def test_copy_auth(self) -> None: class TestAsyncOpenAI: - client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio - async def test_raw_response(self, respx_mock: MockRouter) -> None: + async def test_raw_response(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - response = await self.client.post("/foo", cast_to=httpx.Response) + response = await async_client.post("/foo", cast_to=httpx.Response) assert response.status_code == 200 assert isinstance(response, httpx.Response) assert response.json() == {"foo": "bar"} @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio - async def test_raw_response_for_binary(self, respx_mock: MockRouter) -> None: + async def test_raw_response_for_binary(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: respx_mock.post("/foo").mock( return_value=httpx.Response(200, headers={"Content-Type": "application/binary"}, content='{"foo": "bar"}') ) - response = await self.client.post("/foo", cast_to=httpx.Response) + response = await async_client.post("/foo", cast_to=httpx.Response) assert response.status_code == 200 assert isinstance(response, httpx.Response) assert response.json() == {"foo": "bar"} - def test_copy(self) -> None: - copied = self.client.copy() - assert id(copied) != id(self.client) + def test_copy(self, async_client: AsyncOpenAI) -> None: + copied = async_client.copy() + assert id(copied) != id(async_client) - copied = self.client.copy(api_key="another My API Key") + copied = async_client.copy(api_key="another My API Key") assert copied.api_key == "another My API Key" - assert self.client.api_key == "My API Key" + assert async_client.api_key == "My API Key" - def test_copy_default_options(self) -> None: + def test_copy_default_options(self, async_client: AsyncOpenAI) -> None: # options that have a default are overridden correctly - copied = self.client.copy(max_retries=7) + copied = async_client.copy(max_retries=7) assert copied.max_retries == 7 - assert self.client.max_retries == 2 + assert async_client.max_retries == 2 copied2 = copied.copy(max_retries=6) assert copied2.max_retries == 6 assert copied.max_retries == 7 # timeout - assert isinstance(self.client.timeout, httpx.Timeout) - copied = self.client.copy(timeout=None) + assert isinstance(async_client.timeout, httpx.Timeout) + copied = async_client.copy(timeout=None) assert copied.timeout is None - assert isinstance(self.client.timeout, httpx.Timeout) + assert isinstance(async_client.timeout, httpx.Timeout) - def test_copy_default_headers(self) -> None: + async def test_copy_default_headers(self) -> None: client = AsyncOpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"} ) @@ -1084,8 +1098,9 @@ def test_copy_default_headers(self) -> None: match="`default_headers` and `set_default_headers` arguments are mutually exclusive", ): client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"}) + await client.close() - def test_copy_default_query(self) -> None: + async def test_copy_default_query(self) -> None: client = AsyncOpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"foo": "bar"} ) @@ -1121,13 +1136,15 @@ def test_copy_default_query(self) -> None: ): client.copy(set_default_query={}, default_query={"foo": "Bar"}) - def test_copy_signature(self) -> None: + await client.close() + + def test_copy_signature(self, async_client: AsyncOpenAI) -> None: # ensure the same parameters that can be passed to the client are defined in the `.copy()` method init_signature = inspect.signature( # mypy doesn't like that we access the `__init__` property. - self.client.__init__, # type: ignore[misc] + async_client.__init__, # type: ignore[misc] ) - copy_signature = inspect.signature(self.client.copy) + copy_signature = inspect.signature(async_client.copy) exclude_params = {"transport", "proxies", "_strict_response_validation"} for name in init_signature.parameters.keys(): @@ -1138,12 +1155,12 @@ def test_copy_signature(self) -> None: assert copy_param is not None, f"copy() signature is missing the {name} param" @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12") - def test_copy_build_request(self) -> None: + def test_copy_build_request(self, async_client: AsyncOpenAI) -> None: options = FinalRequestOptions(method="get", url="/foo") def build_request(options: FinalRequestOptions) -> None: - client = self.client.copy() - client._build_request(options) + client_copy = async_client.copy() + client_copy._build_request(options) # ensure that the machinery is warmed up before tracing starts. build_request(options) @@ -1200,12 +1217,12 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic print(frame) raise AssertionError() - async def test_request_timeout(self) -> None: - request = self.client._build_request(FinalRequestOptions(method="get", url="/foo")) + async def test_request_timeout(self, async_client: AsyncOpenAI) -> None: + request = async_client._build_request(FinalRequestOptions(method="get", url="/foo")) timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == DEFAULT_TIMEOUT - request = self.client._build_request( + request = async_client._build_request( FinalRequestOptions(method="get", url="/foo", timeout=httpx.Timeout(100.0)) ) timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore @@ -1220,6 +1237,8 @@ async def test_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == httpx.Timeout(0) + await client.close() + async def test_http_client_timeout_option(self) -> None: # custom timeout given to the httpx client should be used async with httpx.AsyncClient(timeout=None) as http_client: @@ -1231,6 +1250,8 @@ async def test_http_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == httpx.Timeout(None) + await client.close() + # no timeout given to the httpx client should not use the httpx default async with httpx.AsyncClient() as http_client: client = AsyncOpenAI( @@ -1241,6 +1262,8 @@ async def test_http_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == DEFAULT_TIMEOUT + await client.close() + # explicitly passing the default timeout currently results in it being ignored async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client: client = AsyncOpenAI( @@ -1251,6 +1274,8 @@ async def test_http_client_timeout_option(self) -> None: timeout = httpx.Timeout(**request.extensions["timeout"]) # type: ignore assert timeout == DEFAULT_TIMEOUT # our default + await client.close() + def test_invalid_http_client(self) -> None: with pytest.raises(TypeError, match="Invalid `http_client` arg"): with httpx.Client() as http_client: @@ -1261,15 +1286,15 @@ def test_invalid_http_client(self) -> None: http_client=cast(Any, http_client), ) - def test_default_headers_option(self) -> None: - client = AsyncOpenAI( + async def test_default_headers_option(self) -> None: + test_client = AsyncOpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"} ) - request = client._build_request(FinalRequestOptions(method="get", url="/foo")) + request = test_client._build_request(FinalRequestOptions(method="get", url="/foo")) assert request.headers.get("x-foo") == "bar" assert request.headers.get("x-stainless-lang") == "python" - client2 = AsyncOpenAI( + test_client2 = AsyncOpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, @@ -1278,10 +1303,13 @@ def test_default_headers_option(self) -> None: "X-Stainless-Lang": "my-overriding-header", }, ) - request = client2._build_request(FinalRequestOptions(method="get", url="/foo")) + request = test_client2._build_request(FinalRequestOptions(method="get", url="/foo")) assert request.headers.get("x-foo") == "stainless" assert request.headers.get("x-stainless-lang") == "my-overriding-header" + await test_client.close() + await test_client2.close() + async def test_validate_headers(self) -> None: client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) options = await client._prepare_options(FinalRequestOptions(method="get", url="/foo")) @@ -1293,7 +1321,7 @@ async def test_validate_headers(self) -> None: client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True) _ = client2 - def test_default_query_option(self) -> None: + async def test_default_query_option(self) -> None: client = AsyncOpenAI( base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"} ) @@ -1311,8 +1339,10 @@ def test_default_query_option(self) -> None: url = httpx.URL(request.url) assert dict(url.params) == {"foo": "baz", "query_param": "overridden"} - def test_request_extra_json(self) -> None: - request = self.client._build_request( + await client.close() + + def test_request_extra_json(self, client: OpenAI) -> None: + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1323,7 +1353,7 @@ def test_request_extra_json(self) -> None: data = json.loads(request.content.decode("utf-8")) assert data == {"foo": "bar", "baz": False} - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1334,7 +1364,7 @@ def test_request_extra_json(self) -> None: assert data == {"baz": False} # `extra_json` takes priority over `json_data` when keys clash - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1345,8 +1375,8 @@ def test_request_extra_json(self) -> None: data = json.loads(request.content.decode("utf-8")) assert data == {"foo": "bar", "baz": None} - def test_request_extra_headers(self) -> None: - request = self.client._build_request( + def test_request_extra_headers(self, client: OpenAI) -> None: + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1356,7 +1386,7 @@ def test_request_extra_headers(self) -> None: assert request.headers.get("X-Foo") == "Foo" # `extra_headers` takes priority over `default_headers` when keys clash - request = self.client.with_options(default_headers={"X-Bar": "true"})._build_request( + request = client.with_options(default_headers={"X-Bar": "true"})._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1367,8 +1397,8 @@ def test_request_extra_headers(self) -> None: ) assert request.headers.get("X-Bar") == "false" - def test_request_extra_query(self) -> None: - request = self.client._build_request( + def test_request_extra_query(self, client: OpenAI) -> None: + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1381,7 +1411,7 @@ def test_request_extra_query(self) -> None: assert params == {"my_query_param": "Foo"} # if both `query` and `extra_query` are given, they are merged - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1395,7 +1425,7 @@ def test_request_extra_query(self) -> None: assert params == {"bar": "1", "foo": "2"} # `extra_query` takes priority over `query` when keys clash - request = self.client._build_request( + request = client._build_request( FinalRequestOptions( method="post", url="/foo", @@ -1438,7 +1468,7 @@ def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None: ] @pytest.mark.respx(base_url=base_url) - async def test_basic_union_response(self, respx_mock: MockRouter) -> None: + async def test_basic_union_response(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: class Model1(BaseModel): name: str @@ -1447,12 +1477,12 @@ class Model2(BaseModel): respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) + response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) assert isinstance(response, Model2) assert response.foo == "bar" @pytest.mark.respx(base_url=base_url) - async def test_union_response_different_types(self, respx_mock: MockRouter) -> None: + async def test_union_response_different_types(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: """Union of objects with the same field name using a different type""" class Model1(BaseModel): @@ -1463,18 +1493,20 @@ class Model2(BaseModel): respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) + response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) assert isinstance(response, Model2) assert response.foo == "bar" respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": 1})) - response = await self.client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) + response = await async_client.get("/foo", cast_to=cast(Any, Union[Model1, Model2])) assert isinstance(response, Model1) assert response.foo == 1 @pytest.mark.respx(base_url=base_url) - async def test_non_application_json_content_type_for_json_data(self, respx_mock: MockRouter) -> None: + async def test_non_application_json_content_type_for_json_data( + self, respx_mock: MockRouter, async_client: AsyncOpenAI + ) -> None: """ Response that sets Content-Type to something other than application/json but returns json data """ @@ -1490,11 +1522,11 @@ class Model(BaseModel): ) ) - response = await self.client.get("/foo", cast_to=Model) + response = await async_client.get("/foo", cast_to=Model) assert isinstance(response, Model) assert response.foo == 2 - def test_base_url_setter(self) -> None: + async def test_base_url_setter(self) -> None: client = AsyncOpenAI( base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True ) @@ -1504,7 +1536,9 @@ def test_base_url_setter(self) -> None: assert client.base_url == "https://example.com/from_setter/" - def test_base_url_env(self) -> None: + await client.close() + + async def test_base_url_env(self) -> None: with update_env(OPENAI_BASE_URL="http://localhost:5000/from/env"): client = AsyncOpenAI(api_key=api_key, _strict_response_validation=True) assert client.base_url == "http://localhost:5000/from/env/" @@ -1524,7 +1558,7 @@ def test_base_url_env(self) -> None: ], ids=["standard", "custom http client"], ) - def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None: + async def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None: request = client._build_request( FinalRequestOptions( method="post", @@ -1533,6 +1567,7 @@ def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None: ), ) assert request.url == "http://localhost:5000/custom/path/foo" + await client.close() @pytest.mark.parametrize( "client", @@ -1549,7 +1584,7 @@ def test_base_url_trailing_slash(self, client: AsyncOpenAI) -> None: ], ids=["standard", "custom http client"], ) - def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None: + async def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None: request = client._build_request( FinalRequestOptions( method="post", @@ -1558,6 +1593,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None: ), ) assert request.url == "http://localhost:5000/custom/path/foo" + await client.close() @pytest.mark.parametrize( "client", @@ -1574,7 +1610,7 @@ def test_base_url_no_trailing_slash(self, client: AsyncOpenAI) -> None: ], ids=["standard", "custom http client"], ) - def test_absolute_request_url(self, client: AsyncOpenAI) -> None: + async def test_absolute_request_url(self, client: AsyncOpenAI) -> None: request = client._build_request( FinalRequestOptions( method="post", @@ -1583,37 +1619,37 @@ def test_absolute_request_url(self, client: AsyncOpenAI) -> None: ), ) assert request.url == "https://myapi.com/foo" + await client.close() async def test_copied_client_does_not_close_http(self) -> None: - client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - assert not client.is_closed() + test_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) + assert not test_client.is_closed() - copied = client.copy() - assert copied is not client + copied = test_client.copy() + assert copied is not test_client del copied await asyncio.sleep(0.2) - assert not client.is_closed() + assert not test_client.is_closed() async def test_client_context_manager(self) -> None: - client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - async with client as c2: - assert c2 is client + test_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) + async with test_client as c2: + assert c2 is test_client assert not c2.is_closed() - assert not client.is_closed() - assert client.is_closed() + assert not test_client.is_closed() + assert test_client.is_closed() @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio - async def test_client_response_validation_error(self, respx_mock: MockRouter) -> None: + async def test_client_response_validation_error(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: class Model(BaseModel): foo: str respx_mock.get("/foo").mock(return_value=httpx.Response(200, json={"foo": {"invalid": True}})) with pytest.raises(APIResponseValidationError) as exc: - await self.client.get("/foo", cast_to=Model) + await async_client.get("/foo", cast_to=Model) assert isinstance(exc.value.__cause__, ValidationError) @@ -1624,19 +1660,17 @@ async def test_client_max_retries_validation(self) -> None: ) @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio - async def test_default_stream_cls(self, respx_mock: MockRouter) -> None: + async def test_default_stream_cls(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: class Model(BaseModel): name: str respx_mock.post("/foo").mock(return_value=httpx.Response(200, json={"foo": "bar"})) - stream = await self.client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model]) + stream = await async_client.post("/foo", cast_to=Model, stream=True, stream_cls=AsyncStream[Model]) assert isinstance(stream, AsyncStream) await stream.response.aclose() @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio async def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None: class Model(BaseModel): name: str @@ -1648,11 +1682,14 @@ class Model(BaseModel): with pytest.raises(APIResponseValidationError): await strict_client.get("/foo", cast_to=Model) - client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False) + non_strict_client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=False) - response = await client.get("/foo", cast_to=Model) + response = await non_strict_client.get("/foo", cast_to=Model) assert isinstance(response, str) # type: ignore[unreachable] + await strict_client.close() + await non_strict_client.close() + @pytest.mark.parametrize( "remaining_retries,retry_after,timeout", [ @@ -1675,13 +1712,12 @@ class Model(BaseModel): ], ) @mock.patch("time.time", mock.MagicMock(return_value=1696004797)) - @pytest.mark.asyncio - async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None: - client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True) - + async def test_parse_retry_after_header( + self, remaining_retries: int, retry_after: str, timeout: float, async_client: AsyncOpenAI + ) -> None: headers = httpx.Headers({"retry-after": retry_after}) options = FinalRequestOptions(method="get", url="/foo", max_retries=3) - calculated = client._calculate_retry_timeout(remaining_retries, options, headers) + calculated = async_client._calculate_retry_timeout(remaining_retries, options, headers) assert calculated == pytest.approx(timeout, 0.5 * 0.875) # pyright: ignore[reportUnknownMemberType] @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @@ -1700,7 +1736,7 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, model="gpt-4o", ).__aenter__() - assert _get_open_connections(self.client) == 0 + assert _get_open_connections(async_client) == 0 @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) @@ -1717,12 +1753,11 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, ], model="gpt-4o", ).__aenter__() - assert _get_open_connections(self.client) == 0 + assert _get_open_connections(async_client) == 0 @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio @pytest.mark.parametrize("failure_mode", ["status", "exception"]) async def test_retries_taken( self, @@ -1762,7 +1797,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio async def test_omit_retry_count_header( self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter ) -> None: @@ -1795,7 +1829,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio async def test_overwrite_retry_count_header( self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter ) -> None: @@ -1828,7 +1861,6 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) - @pytest.mark.asyncio async def test_retries_taken_new_response_class( self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter ) -> None: @@ -1884,26 +1916,26 @@ async def test_default_client_creation(self) -> None: ) @pytest.mark.respx(base_url=base_url) - async def test_follow_redirects(self, respx_mock: MockRouter) -> None: + async def test_follow_redirects(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: # Test that the default follow_redirects=True allows following redirects respx_mock.post("/redirect").mock( return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"}) ) respx_mock.get("/redirected").mock(return_value=httpx.Response(200, json={"status": "ok"})) - response = await self.client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response) + response = await async_client.post("/redirect", body={"key": "value"}, cast_to=httpx.Response) assert response.status_code == 200 assert response.json() == {"status": "ok"} @pytest.mark.respx(base_url=base_url) - async def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None: + async def test_follow_redirects_disabled(self, respx_mock: MockRouter, async_client: AsyncOpenAI) -> None: # Test that follow_redirects=False prevents following redirects respx_mock.post("/redirect").mock( return_value=httpx.Response(302, headers={"Location": f"{base_url}/redirected"}) ) with pytest.raises(APIStatusError) as exc_info: - await self.client.post( + await async_client.post( "/redirect", body={"key": "value"}, options={"follow_redirects": False}, cast_to=httpx.Response ) From f7e9e9e4f43039f19a41375a6d2b2bdc2264dad7 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 3 Nov 2025 16:14:32 +0000 Subject: [PATCH 6/8] chore(internal): grammar fix (it's -> its) --- src/openai/_utils/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py index cddf2c8da4..90494748cc 100644 --- a/src/openai/_utils/_utils.py +++ b/src/openai/_utils/_utils.py @@ -137,7 +137,7 @@ def is_given(obj: _T | NotGiven | Omit) -> TypeGuard[_T]: # Type safe methods for narrowing types with TypeVars. # The default narrowing for isinstance(obj, dict) is dict[unknown, unknown], # however this cause Pyright to rightfully report errors. As we know we don't -# care about the contained types we can safely use `object` in it's place. +# care about the contained types we can safely use `object` in its place. # # There are two separate functions defined, `is_*` and `is_*_t` for different use cases. # `is_*` is for when you're dealing with an unknown input From 5b4399219d7ed326411aec524d25ef2b8e3152fc Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 3 Nov 2025 23:07:04 +0000 Subject: [PATCH 7/8] feat(api): Realtime API token_limits, Hybrid searching ranking options --- .stats.yml | 6 +-- src/openai/resources/images.py | 30 +++++++++++--- src/openai/resources/realtime/calls.py | 30 ++++++++++++-- .../resources/vector_stores/file_batches.py | 40 +++++++++++++------ .../types/realtime/call_accept_params.py | 15 ++++++- .../realtime_session_create_request.py | 15 ++++++- .../realtime_session_create_request_param.py | 15 ++++++- .../realtime_session_create_response.py | 15 ++++++- .../realtime_truncation_retention_ratio.py | 26 ++++++++++-- ...altime_truncation_retention_ratio_param.py | 25 ++++++++++-- .../types/responses/file_search_tool.py | 16 +++++++- .../types/responses/file_search_tool_param.py | 16 +++++++- .../types/responses/response_output_text.py | 6 +-- .../responses/response_output_text_param.py | 4 +- src/openai/types/responses/tool.py | 2 + src/openai/types/responses/tool_param.py | 2 + .../vector_stores/file_batch_create_params.py | 40 +++++++++++++++++-- src/openai/types/video.py | 3 ++ .../vector_stores/test_file_batches.py | 26 +++++++----- 19 files changed, 273 insertions(+), 59 deletions(-) diff --git a/.stats.yml b/.stats.yml index bc4e084f99..d59fe71ee4 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 136 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml -openapi_spec_hash: 1560717860bba4105936647dde8f618d -config_hash: 50ee3382a63c021a9f821a935950e926 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml +openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b +config_hash: 032995825500a503a76da119f5354905 diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py index 9bb332230f..265be6f743 100644 --- a/src/openai/resources/images.py +++ b/src/openai/resources/images.py @@ -168,7 +168,10 @@ def edit( If `transparent`, the output format needs to support transparency, so it should be set to either `png` (default value) or `webp`. - input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + input_fidelity: Control how much effort the model will exert to match the style and features, + especially facial features, of input images. This parameter is only supported + for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and + `low`. Defaults to `low`. mask: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, @@ -282,7 +285,10 @@ def edit( If `transparent`, the output format needs to support transparency, so it should be set to either `png` (default value) or `webp`. - input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + input_fidelity: Control how much effort the model will exert to match the style and features, + especially facial features, of input images. This parameter is only supported + for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and + `low`. Defaults to `low`. mask: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, @@ -392,7 +398,10 @@ def edit( If `transparent`, the output format needs to support transparency, so it should be set to either `png` (default value) or `webp`. - input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + input_fidelity: Control how much effort the model will exert to match the style and features, + especially facial features, of input images. This parameter is only supported + for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and + `low`. Defaults to `low`. mask: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, @@ -1046,7 +1055,10 @@ async def edit( If `transparent`, the output format needs to support transparency, so it should be set to either `png` (default value) or `webp`. - input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + input_fidelity: Control how much effort the model will exert to match the style and features, + especially facial features, of input images. This parameter is only supported + for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and + `low`. Defaults to `low`. mask: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, @@ -1160,7 +1172,10 @@ async def edit( If `transparent`, the output format needs to support transparency, so it should be set to either `png` (default value) or `webp`. - input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + input_fidelity: Control how much effort the model will exert to match the style and features, + especially facial features, of input images. This parameter is only supported + for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and + `low`. Defaults to `low`. mask: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, @@ -1270,7 +1285,10 @@ async def edit( If `transparent`, the output format needs to support transparency, so it should be set to either `png` (default value) or `webp`. - input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`. + input_fidelity: Control how much effort the model will exert to match the style and features, + especially facial features, of input images. This parameter is only supported + for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and + `low`. Defaults to `low`. mask: An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py index a8c4761717..7d2c92fe86 100644 --- a/src/openai/resources/realtime/calls.py +++ b/src/openai/resources/realtime/calls.py @@ -195,8 +195,19 @@ def accept( `auto` will create a trace for the session with default values for the workflow name, group id, and metadata. - truncation: Controls how the realtime conversation is truncated prior to model inference. - The default is `auto`. + truncation: When the number of tokens in a conversation exceeds the model's input token + limit, the conversation be truncated, meaning messages (starting from the + oldest) will not be included in the model's context. A 32k context model with + 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. Clients can configure truncation behavior to truncate with a + lower max token limit, which is an effective way to control token usage and + cost. Truncation will reduce the number of cached tokens on the next turn + (busting the cache), since messages are dropped from the beginning of the + context. However, clients can also configure truncation to retain messages up to + a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. Truncation can be disabled + entirely, which means the server will never truncate but would instead return an + error if the conversation exceeds the model's input token limit. extra_headers: Send extra headers @@ -504,8 +515,19 @@ async def accept( `auto` will create a trace for the session with default values for the workflow name, group id, and metadata. - truncation: Controls how the realtime conversation is truncated prior to model inference. - The default is `auto`. + truncation: When the number of tokens in a conversation exceeds the model's input token + limit, the conversation be truncated, meaning messages (starting from the + oldest) will not be included in the model's context. A 32k context model with + 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. Clients can configure truncation behavior to truncate with a + lower max token limit, which is an effective way to control token usage and + cost. Truncation will reduce the number of cached tokens on the next turn + (busting the cache), since messages are dropped from the beginning of the + context. However, clients can also configure truncation to retain messages up to + a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. Truncation can be disabled + entirely, which means the server will never truncate but would instead return an + error if the conversation exceeds the model's input token limit. extra_headers: Send extra headers diff --git a/src/openai/resources/vector_stores/file_batches.py b/src/openai/resources/vector_stores/file_batches.py index 0f989821de..d31fb59bec 100644 --- a/src/openai/resources/vector_stores/file_batches.py +++ b/src/openai/resources/vector_stores/file_batches.py @@ -52,9 +52,10 @@ def create( self, vector_store_id: str, *, - file_ids: SequenceNotStr[str], attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit, chunking_strategy: FileChunkingStrategyParam | Omit = omit, + file_ids: SequenceNotStr[str] | Omit = omit, + files: Iterable[file_batch_create_params.File] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -66,10 +67,6 @@ def create( Create a vector store file batch. Args: - file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that - the vector store should use. Useful for tools like `file_search` that can access - files. - attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum @@ -79,6 +76,16 @@ def create( chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. Only applicable if `file_ids` is non-empty. + file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that + the vector store should use. Useful for tools like `file_search` that can access + files. If `attributes` or `chunking_strategy` are provided, they will be applied + to all files in the batch. Mutually exclusive with `files`. + + files: A list of objects that each include a `file_id` plus optional `attributes` or + `chunking_strategy`. Use this when you need to override metadata for specific + files. The global `attributes` or `chunking_strategy` will be ignored and must + be specified for each file. Mutually exclusive with `file_ids`. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -94,9 +101,10 @@ def create( f"/vector_stores/{vector_store_id}/file_batches", body=maybe_transform( { - "file_ids": file_ids, "attributes": attributes, "chunking_strategy": chunking_strategy, + "file_ids": file_ids, + "files": files, }, file_batch_create_params.FileBatchCreateParams, ), @@ -389,9 +397,10 @@ async def create( self, vector_store_id: str, *, - file_ids: SequenceNotStr[str], attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit, chunking_strategy: FileChunkingStrategyParam | Omit = omit, + file_ids: SequenceNotStr[str] | Omit = omit, + files: Iterable[file_batch_create_params.File] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -403,10 +412,6 @@ async def create( Create a vector store file batch. Args: - file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that - the vector store should use. Useful for tools like `file_search` that can access - files. - attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum @@ -416,6 +421,16 @@ async def create( chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. Only applicable if `file_ids` is non-empty. + file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that + the vector store should use. Useful for tools like `file_search` that can access + files. If `attributes` or `chunking_strategy` are provided, they will be applied + to all files in the batch. Mutually exclusive with `files`. + + files: A list of objects that each include a `file_id` plus optional `attributes` or + `chunking_strategy`. Use this when you need to override metadata for specific + files. The global `attributes` or `chunking_strategy` will be ignored and must + be specified for each file. Mutually exclusive with `file_ids`. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -431,9 +446,10 @@ async def create( f"/vector_stores/{vector_store_id}/file_batches", body=await async_maybe_transform( { - "file_ids": file_ids, "attributes": attributes, "chunking_strategy": chunking_strategy, + "file_ids": file_ids, + "files": files, }, file_batch_create_params.FileBatchCreateParams, ), diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py index 0cfb01e7cf..d6fc92b8e5 100644 --- a/src/openai/types/realtime/call_accept_params.py +++ b/src/openai/types/realtime/call_accept_params.py @@ -106,6 +106,17 @@ class CallAcceptParams(TypedDict, total=False): truncation: RealtimeTruncationParam """ - Controls how the realtime conversation is truncated prior to model inference. - The default is `auto`. + When the number of tokens in a conversation exceeds the model's input token + limit, the conversation be truncated, meaning messages (starting from the + oldest) will not be included in the model's context. A 32k context model with + 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. Clients can configure truncation behavior to truncate with a + lower max token limit, which is an effective way to control token usage and + cost. Truncation will reduce the number of cached tokens on the next turn + (busting the cache), since messages are dropped from the beginning of the + context. However, clients can also configure truncation to retain messages up to + a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. Truncation can be disabled + entirely, which means the server will never truncate but would instead return an + error if the conversation exceeds the model's input token limit. """ diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py index bc205bd3b5..016ae45b67 100644 --- a/src/openai/types/realtime/realtime_session_create_request.py +++ b/src/openai/types/realtime/realtime_session_create_request.py @@ -106,6 +106,17 @@ class RealtimeSessionCreateRequest(BaseModel): truncation: Optional[RealtimeTruncation] = None """ - Controls how the realtime conversation is truncated prior to model inference. - The default is `auto`. + When the number of tokens in a conversation exceeds the model's input token + limit, the conversation be truncated, meaning messages (starting from the + oldest) will not be included in the model's context. A 32k context model with + 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. Clients can configure truncation behavior to truncate with a + lower max token limit, which is an effective way to control token usage and + cost. Truncation will reduce the number of cached tokens on the next turn + (busting the cache), since messages are dropped from the beginning of the + context. However, clients can also configure truncation to retain messages up to + a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. Truncation can be disabled + entirely, which means the server will never truncate but would instead return an + error if the conversation exceeds the model's input token limit. """ diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py index d1fa2b35d2..8c3998c1ca 100644 --- a/src/openai/types/realtime/realtime_session_create_request_param.py +++ b/src/openai/types/realtime/realtime_session_create_request_param.py @@ -106,6 +106,17 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False): truncation: RealtimeTruncationParam """ - Controls how the realtime conversation is truncated prior to model inference. - The default is `auto`. + When the number of tokens in a conversation exceeds the model's input token + limit, the conversation be truncated, meaning messages (starting from the + oldest) will not be included in the model's context. A 32k context model with + 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. Clients can configure truncation behavior to truncate with a + lower max token limit, which is an effective way to control token usage and + cost. Truncation will reduce the number of cached tokens on the next turn + (busting the cache), since messages are dropped from the beginning of the + context. However, clients can also configure truncation to retain messages up to + a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. Truncation can be disabled + entirely, which means the server will never truncate but would instead return an + error if the conversation exceeds the model's input token limit. """ diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py index bb6b94e900..c1336cd6e4 100644 --- a/src/openai/types/realtime/realtime_session_create_response.py +++ b/src/openai/types/realtime/realtime_session_create_response.py @@ -459,6 +459,17 @@ class RealtimeSessionCreateResponse(BaseModel): truncation: Optional[RealtimeTruncation] = None """ - Controls how the realtime conversation is truncated prior to model inference. - The default is `auto`. + When the number of tokens in a conversation exceeds the model's input token + limit, the conversation be truncated, meaning messages (starting from the + oldest) will not be included in the model's context. A 32k context model with + 4,096 max output tokens can only include 28,224 tokens in the context before + truncation occurs. Clients can configure truncation behavior to truncate with a + lower max token limit, which is an effective way to control token usage and + cost. Truncation will reduce the number of cached tokens on the next turn + (busting the cache), since messages are dropped from the beginning of the + context. However, clients can also configure truncation to retain messages up to + a fraction of the maximum context size, which will reduce the need for future + truncations and thus improve the cache rate. Truncation can be disabled + entirely, which means the server will never truncate but would instead return an + error if the conversation exceeds the model's input token limit. """ diff --git a/src/openai/types/realtime/realtime_truncation_retention_ratio.py b/src/openai/types/realtime/realtime_truncation_retention_ratio.py index b40427244e..e19ed64831 100644 --- a/src/openai/types/realtime/realtime_truncation_retention_ratio.py +++ b/src/openai/types/realtime/realtime_truncation_retention_ratio.py @@ -1,18 +1,38 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing import Optional from typing_extensions import Literal from ..._models import BaseModel -__all__ = ["RealtimeTruncationRetentionRatio"] +__all__ = ["RealtimeTruncationRetentionRatio", "TokenLimits"] + + +class TokenLimits(BaseModel): + post_instructions: Optional[int] = None + """ + Maximum tokens allowed in the conversation after instructions (which including + tool definitions). For example, setting this to 5,000 would mean that truncation + would occur when the conversation exceeds 5,000 tokens after instructions. This + cannot be higher than the model's context window size minus the maximum output + tokens. + """ class RealtimeTruncationRetentionRatio(BaseModel): retention_ratio: float """ - Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the - conversation exceeds the input token limit. + Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when + the conversation exceeds the input token limit. Setting this to `0.8` means that + messages will be dropped until 80% of the maximum allowed tokens are used. This + helps reduce the frequency of truncations and improve cache rates. """ type: Literal["retention_ratio"] """Use retention ratio truncation.""" + + token_limits: Optional[TokenLimits] = None + """Optional custom token limits for this truncation strategy. + + If not provided, the model's default token limits will be used. + """ diff --git a/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py b/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py index b65d65666a..4ea80fe4ce 100644 --- a/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py +++ b/src/openai/types/realtime/realtime_truncation_retention_ratio_param.py @@ -4,15 +4,34 @@ from typing_extensions import Literal, Required, TypedDict -__all__ = ["RealtimeTruncationRetentionRatioParam"] +__all__ = ["RealtimeTruncationRetentionRatioParam", "TokenLimits"] + + +class TokenLimits(TypedDict, total=False): + post_instructions: int + """ + Maximum tokens allowed in the conversation after instructions (which including + tool definitions). For example, setting this to 5,000 would mean that truncation + would occur when the conversation exceeds 5,000 tokens after instructions. This + cannot be higher than the model's context window size minus the maximum output + tokens. + """ class RealtimeTruncationRetentionRatioParam(TypedDict, total=False): retention_ratio: Required[float] """ - Fraction of post-instruction conversation tokens to retain (0.0 - 1.0) when the - conversation exceeds the input token limit. + Fraction of post-instruction conversation tokens to retain (`0.0` - `1.0`) when + the conversation exceeds the input token limit. Setting this to `0.8` means that + messages will be dropped until 80% of the maximum allowed tokens are used. This + helps reduce the frequency of truncations and improve cache rates. """ type: Required[Literal["retention_ratio"]] """Use retention ratio truncation.""" + + token_limits: TokenLimits + """Optional custom token limits for this truncation strategy. + + If not provided, the model's default token limits will be used. + """ diff --git a/src/openai/types/responses/file_search_tool.py b/src/openai/types/responses/file_search_tool.py index dbdd8cffab..d0d08a323f 100644 --- a/src/openai/types/responses/file_search_tool.py +++ b/src/openai/types/responses/file_search_tool.py @@ -7,12 +7,26 @@ from ..shared.compound_filter import CompoundFilter from ..shared.comparison_filter import ComparisonFilter -__all__ = ["FileSearchTool", "Filters", "RankingOptions"] +__all__ = ["FileSearchTool", "Filters", "RankingOptions", "RankingOptionsHybridSearch"] Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter, None] +class RankingOptionsHybridSearch(BaseModel): + embedding_weight: float + """The weight of the embedding in the reciprocal ranking fusion.""" + + text_weight: float + """The weight of the text in the reciprocal ranking fusion.""" + + class RankingOptions(BaseModel): + hybrid_search: Optional[RankingOptionsHybridSearch] = None + """ + Weights that control how reciprocal rank fusion balances semantic embedding + matches versus sparse keyword matches when hybrid search is enabled. + """ + ranker: Optional[Literal["auto", "default-2024-11-15"]] = None """The ranker to use for the file search.""" diff --git a/src/openai/types/responses/file_search_tool_param.py b/src/openai/types/responses/file_search_tool_param.py index c7641c1b86..b37a669ebd 100644 --- a/src/openai/types/responses/file_search_tool_param.py +++ b/src/openai/types/responses/file_search_tool_param.py @@ -9,12 +9,26 @@ from ..shared_params.compound_filter import CompoundFilter from ..shared_params.comparison_filter import ComparisonFilter -__all__ = ["FileSearchToolParam", "Filters", "RankingOptions"] +__all__ = ["FileSearchToolParam", "Filters", "RankingOptions", "RankingOptionsHybridSearch"] Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter] +class RankingOptionsHybridSearch(TypedDict, total=False): + embedding_weight: Required[float] + """The weight of the embedding in the reciprocal ranking fusion.""" + + text_weight: Required[float] + """The weight of the text in the reciprocal ranking fusion.""" + + class RankingOptions(TypedDict, total=False): + hybrid_search: RankingOptionsHybridSearch + """ + Weights that control how reciprocal rank fusion balances semantic embedding + matches versus sparse keyword matches when hybrid search is enabled. + """ + ranker: Literal["auto", "default-2024-11-15"] """The ranker to use for the file search.""" diff --git a/src/openai/types/responses/response_output_text.py b/src/openai/types/responses/response_output_text.py index aa97b629f0..fc579cd894 100644 --- a/src/openai/types/responses/response_output_text.py +++ b/src/openai/types/responses/response_output_text.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Union, Optional +from typing import List, Union from typing_extensions import Literal, Annotated, TypeAlias from ..._utils import PropertyInfo @@ -108,10 +108,10 @@ class ResponseOutputText(BaseModel): annotations: List[Annotation] """The annotations of the text output.""" + logprobs: List[Logprob] + text: str """The text output from the model.""" type: Literal["output_text"] """The type of the output text. Always `output_text`.""" - - logprobs: Optional[List[Logprob]] = None diff --git a/src/openai/types/responses/response_output_text_param.py b/src/openai/types/responses/response_output_text_param.py index 63d2d394a8..445a308a5b 100644 --- a/src/openai/types/responses/response_output_text_param.py +++ b/src/openai/types/responses/response_output_text_param.py @@ -106,10 +106,10 @@ class ResponseOutputTextParam(TypedDict, total=False): annotations: Required[Iterable[Annotation]] """The annotations of the text output.""" + logprobs: Required[Iterable[Logprob]] + text: Required[str] """The text output from the model.""" type: Required[Literal["output_text"]] """The type of the output text. Always `output_text`.""" - - logprobs: Iterable[Logprob] diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py index 6239b818c9..b29fede0c9 100644 --- a/src/openai/types/responses/tool.py +++ b/src/openai/types/responses/tool.py @@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(BaseModel): file_ids: Optional[List[str]] = None """An optional list of uploaded files to make available to your code.""" + memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None + CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto] diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py index ff4ac2b953..dd1ea0bd54 100644 --- a/src/openai/types/responses/tool_param.py +++ b/src/openai/types/responses/tool_param.py @@ -161,6 +161,8 @@ class CodeInterpreterContainerCodeInterpreterToolAuto(TypedDict, total=False): file_ids: SequenceNotStr[str] """An optional list of uploaded files to make available to your code.""" + memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] + CodeInterpreterContainer: TypeAlias = Union[str, CodeInterpreterContainerCodeInterpreterToolAuto] diff --git a/src/openai/types/vector_stores/file_batch_create_params.py b/src/openai/types/vector_stores/file_batch_create_params.py index d8d7b44888..2ab98a83ab 100644 --- a/src/openai/types/vector_stores/file_batch_create_params.py +++ b/src/openai/types/vector_stores/file_batch_create_params.py @@ -2,20 +2,54 @@ from __future__ import annotations -from typing import Dict, Union, Optional +from typing import Dict, Union, Iterable, Optional from typing_extensions import Required, TypedDict from ..._types import SequenceNotStr from ..file_chunking_strategy_param import FileChunkingStrategyParam -__all__ = ["FileBatchCreateParams"] +__all__ = ["FileBatchCreateParams", "File"] class FileBatchCreateParams(TypedDict, total=False): - file_ids: Required[SequenceNotStr[str]] + attributes: Optional[Dict[str, Union[str, float, bool]]] + """Set of 16 key-value pairs that can be attached to an object. + + This can be useful for storing additional information about the object in a + structured format, and querying for objects via API or the dashboard. Keys are + strings with a maximum length of 64 characters. Values are strings with a + maximum length of 512 characters, booleans, or numbers. + """ + + chunking_strategy: FileChunkingStrategyParam + """The chunking strategy used to chunk the file(s). + + If not set, will use the `auto` strategy. Only applicable if `file_ids` is + non-empty. + """ + + file_ids: SequenceNotStr[str] """ A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access + files. If `attributes` or `chunking_strategy` are provided, they will be applied + to all files in the batch. Mutually exclusive with `files`. + """ + + files: Iterable[File] + """ + A list of objects that each include a `file_id` plus optional `attributes` or + `chunking_strategy`. Use this when you need to override metadata for specific + files. The global `attributes` or `chunking_strategy` will be ignored and must + be specified for each file. Mutually exclusive with `file_ids`. + """ + + +class File(TypedDict, total=False): + file_id: Required[str] + """ + A [File](https://platform.openai.com/docs/api-reference/files) ID that the + vector store should use. Useful for tools like `file_search` that can access files. """ diff --git a/src/openai/types/video.py b/src/openai/types/video.py index 2c804f75b8..22ee3a11f7 100644 --- a/src/openai/types/video.py +++ b/src/openai/types/video.py @@ -37,6 +37,9 @@ class Video(BaseModel): progress: int """Approximate completion percentage for the generation task.""" + prompt: Optional[str] = None + """The prompt that was used to generate the video.""" + remixed_from_video_id: Optional[str] = None """Identifier of the source video if this video is a remix.""" diff --git a/tests/api_resources/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py index ac678ce912..abbefc20e9 100644 --- a/tests/api_resources/vector_stores/test_file_batches.py +++ b/tests/api_resources/vector_stores/test_file_batches.py @@ -25,7 +25,6 @@ class TestFileBatches: def test_method_create(self, client: OpenAI) -> None: file_batch = client.vector_stores.file_batches.create( vector_store_id="vs_abc123", - file_ids=["string"], ) assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"]) @@ -33,9 +32,16 @@ def test_method_create(self, client: OpenAI) -> None: def test_method_create_with_all_params(self, client: OpenAI) -> None: file_batch = client.vector_stores.file_batches.create( vector_store_id="vs_abc123", - file_ids=["string"], attributes={"foo": "string"}, chunking_strategy={"type": "auto"}, + file_ids=["string"], + files=[ + { + "file_id": "file_id", + "attributes": {"foo": "string"}, + "chunking_strategy": {"type": "auto"}, + } + ], ) assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"]) @@ -43,7 +49,6 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None: def test_raw_response_create(self, client: OpenAI) -> None: response = client.vector_stores.file_batches.with_raw_response.create( vector_store_id="vs_abc123", - file_ids=["string"], ) assert response.is_closed is True @@ -55,7 +60,6 @@ def test_raw_response_create(self, client: OpenAI) -> None: def test_streaming_response_create(self, client: OpenAI) -> None: with client.vector_stores.file_batches.with_streaming_response.create( vector_store_id="vs_abc123", - file_ids=["string"], ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -70,7 +74,6 @@ def test_path_params_create(self, client: OpenAI) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"): client.vector_stores.file_batches.with_raw_response.create( vector_store_id="", - file_ids=["string"], ) @parametrize @@ -240,7 +243,6 @@ class TestAsyncFileBatches: async def test_method_create(self, async_client: AsyncOpenAI) -> None: file_batch = await async_client.vector_stores.file_batches.create( vector_store_id="vs_abc123", - file_ids=["string"], ) assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"]) @@ -248,9 +250,16 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None: async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None: file_batch = await async_client.vector_stores.file_batches.create( vector_store_id="vs_abc123", - file_ids=["string"], attributes={"foo": "string"}, chunking_strategy={"type": "auto"}, + file_ids=["string"], + files=[ + { + "file_id": "file_id", + "attributes": {"foo": "string"}, + "chunking_strategy": {"type": "auto"}, + } + ], ) assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"]) @@ -258,7 +267,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None: response = await async_client.vector_stores.file_batches.with_raw_response.create( vector_store_id="vs_abc123", - file_ids=["string"], ) assert response.is_closed is True @@ -270,7 +278,6 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None: async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None: async with async_client.vector_stores.file_batches.with_streaming_response.create( vector_store_id="vs_abc123", - file_ids=["string"], ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -285,7 +292,6 @@ async def test_path_params_create(self, async_client: AsyncOpenAI) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"): await async_client.vector_stores.file_batches.with_raw_response.create( vector_store_id="", - file_ids=["string"], ) @parametrize From 7512e60d92fd2c42d40c1d606fbb59d231b13f20 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 3 Nov 2025 23:07:37 +0000 Subject: [PATCH 8/8] release: 2.7.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 22 ++++++++++++++++++++++ pyproject.toml | 2 +- src/openai/_version.py | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 2f8909f197..d1328ca9c9 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.6.1" + ".": "2.7.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ce541566d..516368cfed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,27 @@ # Changelog +## 2.7.0 (2025-11-03) + +Full Changelog: [v2.6.1...v2.7.0](https://github.com/openai/openai-python/compare/v2.6.1...v2.7.0) + +### Features + +* **api:** Realtime API token_limits, Hybrid searching ranking options ([5b43992](https://github.com/openai/openai-python/commit/5b4399219d7ed326411aec524d25ef2b8e3152fc)) +* **api:** remove InputAudio from ResponseInputContent ([bd70a33](https://github.com/openai/openai-python/commit/bd70a33234741fa68c185105e4f53cc0275a2a50)) + + +### Bug Fixes + +* **client:** close streams without requiring full consumption ([d8bb7d6](https://github.com/openai/openai-python/commit/d8bb7d6d728c5481de4198eebe668b67803ae14a)) +* **readme:** update realtime examples ([#2714](https://github.com/openai/openai-python/issues/2714)) ([d0370a8](https://github.com/openai/openai-python/commit/d0370a8d61fc2f710a34d8aad48f649a9683106d)) +* **uploads:** avoid file handle leak ([4f1b691](https://github.com/openai/openai-python/commit/4f1b691ab4db41aebd397ec41942b43fb0f0743c)) + + +### Chores + +* **internal/tests:** avoid race condition with implicit client cleanup ([933d23b](https://github.com/openai/openai-python/commit/933d23bd8d7809c77e0796becfe052167d44d40a)) +* **internal:** grammar fix (it's -> its) ([f7e9e9e](https://github.com/openai/openai-python/commit/f7e9e9e4f43039f19a41375a6d2b2bdc2264dad7)) + ## 2.6.1 (2025-10-24) Full Changelog: [v2.6.0...v2.6.1](https://github.com/openai/openai-python/compare/v2.6.0...v2.6.1) diff --git a/pyproject.toml b/pyproject.toml index e96101b51c..8ff272e18e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openai" -version = "2.6.1" +version = "2.7.0" description = "The official Python library for the openai API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/openai/_version.py b/src/openai/_version.py index b0fe817996..0963c9c373 100644 --- a/src/openai/_version.py +++ b/src/openai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "openai" -__version__ = "2.6.1" # x-release-please-version +__version__ = "2.7.0" # x-release-please-version