openai · stainless-app · Nov 3, 2025 · Oct 28, 2025 · Oct 29, 2025 · Oct 29, 2025
@@ -1,3 +1,3 @@
 {
-  ".": "2.6.1"
+  ".": "2.7.0"
 }
@@ -1,4 +1,4 @@
 configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a3c45d9bd3bb25bf4eaa49b7fb473a00038293dec659ffaa44f624ded884abf4.yml
-openapi_spec_hash: 9c20aaf786a0700dabd13d9865481c9e
-config_hash: 50ee3382a63c021a9f821a935950e926
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
+openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
+config_hash: 032995825500a503a76da119f5354905
@@ -1,5 +1,27 @@
 # Changelog
 
+## 2.7.0 (2025-11-03)
+
+Full Changelog: [v2.6.1...v2.7.0](https://github.com/openai/openai-python/compare/v2.6.1...v2.7.0)
+
+### Features
+
+* **api:** Realtime API token_limits, Hybrid searching ranking options ([5b43992](https://github.com/openai/openai-python/commit/5b4399219d7ed326411aec524d25ef2b8e3152fc))
+* **api:** remove InputAudio from ResponseInputContent ([bd70a33](https://github.com/openai/openai-python/commit/bd70a33234741fa68c185105e4f53cc0275a2a50))
+
+
+### Bug Fixes
+
+* **client:** close streams without requiring full consumption ([d8bb7d6](https://github.com/openai/openai-python/commit/d8bb7d6d728c5481de4198eebe668b67803ae14a))
+* **readme:** update realtime examples ([#2714](https://github.com/openai/openai-python/issues/2714)) ([d0370a8](https://github.com/openai/openai-python/commit/d0370a8d61fc2f710a34d8aad48f649a9683106d))
+* **uploads:** avoid file handle leak ([4f1b691](https://github.com/openai/openai-python/commit/4f1b691ab4db41aebd397ec41942b43fb0f0743c))
+
+
+### Chores
+
+* **internal/tests:** avoid race condition with implicit client cleanup ([933d23b](https://github.com/openai/openai-python/commit/933d23bd8d7809c77e0796becfe052167d44d40a))
+* **internal:** grammar fix (it's -&gt; its) ([f7e9e9e](https://github.com/openai/openai-python/commit/f7e9e9e4f43039f19a41375a6d2b2bdc2264dad7))
+
 ## 2.6.1 (2025-10-24)
 
 Full Changelog: [v2.6.0...v2.6.1](https://github.com/openai/openai-python/compare/v2.6.0...v2.6.1)

@@ -244,7 +244,9 @@ async def main():
     client = AsyncOpenAI()
 
     async with client.realtime.connect(model="gpt-realtime") as connection:
-        await connection.session.update(session={'modalities': ['text']})
+        await connection.session.update(
+            session={"type": "realtime", "output_modalities": ["text"]}
+        )
 
         await connection.conversation.item.create(
             item={
@@ -256,10 +258,10 @@ async def main():
         await connection.response.create()
 
         async for event in connection:
-            if event.type == 'response.text.delta':
+            if event.type == "response.output_text.delta":
                 print(event.delta, flush=True, end="")
 
-            elif event.type == 'response.text.done':
+            elif event.type == "response.output_text.done":
                 print()
 
             elif event.type == "response.done":

@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "2.6.1"
+version = "2.7.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"

@@ -96,9 +96,8 @@ def __stream__(self) -> Iterator[_T]:
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
-        # Ensure the entire stream is consumed
-        for _sse in iterator:
-            ...
+        # As we might not fully consume the response stream, we need to close it explicitly
+        response.close()
 
     def __enter__(self) -> Self:
         return self
@@ -198,9 +197,8 @@ async def __stream__(self) -> AsyncIterator[_T]:
 
                 yield process_data(data=data, cast_to=cast_to, response=response)
 
-        # Ensure the entire stream is consumed
-        async for _sse in iterator:
-            ...
+        # As we might not fully consume the response stream, we need to close it explicitly
+        await response.aclose()
 
     async def __aenter__(self) -> Self:
         return self

@@ -137,7 +137,7 @@ def is_given(obj: _T | NotGiven | Omit) -> TypeGuard[_T]:
 # Type safe methods for narrowing types with TypeVars.
 # The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
 # however this cause Pyright to rightfully report errors. As we know we don't
-# care about the contained types we can safely use `object` in it's place.
+# care about the contained types we can safely use `object` in its place.
 #
 # There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
 # `is_*` is for when you're dealing with an unknown input

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "2.6.1"  # x-release-please-version
+__version__ = "2.7.0"  # x-release-please-version
@@ -168,7 +168,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -282,7 +285,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -392,7 +398,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1046,7 +1055,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1160,7 +1172,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1270,7 +1285,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,

@@ -195,8 +195,19 @@ def accept(
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
@@ -504,8 +515,19 @@ async def accept(
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 

@@ -157,9 +157,8 @@ def upload_file_chunked(
                 part = self.parts.create(upload_id=upload.id, data=data)
                 log.info("Uploaded part %s for upload %s", part.id, upload.id)
                 part_ids.append(part.id)
-        except Exception:
+        finally:
             buf.close()
-            raise
 
         return self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)
 
@@ -465,9 +464,8 @@ async def upload_file_chunked(
                     part = await self.parts.create(upload_id=upload.id, data=data)
                     log.info("Uploaded part %s for upload %s", part.id, upload.id)
                     part_ids.append(part.id)
-            except Exception:
+            finally:
                 buf.close()
-                raise
 
         return await self.complete(upload_id=upload.id, part_ids=part_ids, md5=md5)