From 6c8fcd541e45a83513123b68de60b25cdd623b07 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 18 Jun 2024 19:53:21 +0000
Subject: [PATCH] feat(api): add service tier argument for chat completions
 (#1486)

---
 .stats.yml                                    |  2 +-
 src/openai/_base_client.py                    |  8 ++-
 src/openai/resources/chat/completions.py      | 70 +++++++++++++++++++
 src/openai/types/chat/chat_completion.py      |  7 ++
 .../types/chat/chat_completion_chunk.py       |  7 ++
 .../types/chat/completion_create_params.py    | 13 ++++
 tests/api_resources/chat/test_completions.py  |  4 ++
 7 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index c5ada3b5d..aa7e8427b 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 64
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index 5d5d25fca..1c9a1a03f 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -457,7 +457,7 @@ def _build_request(
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
         headers = self._build_headers(options)
-        params = _merge_mappings(self._custom_query, options.params)
+        params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
 
         # If the given Content-Type header is multipart/form-data then it
@@ -593,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    @property
+    def default_query(self) -> dict[str, object]:
+        return {
+            **self._custom_query,
+        }
+
     def _validate_headers(
         self,
         headers: Headers,  # noqa: ARG002
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index ed8e9373b..d50bce075 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -59,6 +59,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -163,6 +164,16 @@ def create(
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', the system will utilize scale tier credits until they are
+                exhausted.
+              - If set to 'default', the request will be processed in the shared cluster.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
@@ -236,6 +247,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -346,6 +358,16 @@ def create(
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', the system will utilize scale tier credits until they are
+                exhausted.
+              - If set to 'default', the request will be processed in the shared cluster.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -412,6 +434,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -522,6 +545,16 @@ def create(
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', the system will utilize scale tier credits until they are
+                exhausted.
+              - If set to 'default', the request will be processed in the shared cluster.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -587,6 +620,7 @@ def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -620,6 +654,7 @@ def create(
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
                     "seed": seed,
+                    "service_tier": service_tier,
                     "stop": stop,
                     "stream": stream,
                     "stream_options": stream_options,
@@ -667,6 +702,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -771,6 +807,16 @@ async def create(
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', the system will utilize scale tier credits until they are
+                exhausted.
+              - If set to 'default', the request will be processed in the shared cluster.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
@@ -844,6 +890,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -954,6 +1001,16 @@ async def create(
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', the system will utilize scale tier credits until they are
+                exhausted.
+              - If set to 'default', the request will be processed in the shared cluster.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1020,6 +1077,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1130,6 +1188,16 @@ async def create(
               should refer to the `system_fingerprint` response parameter to monitor changes
               in the backend.
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', the system will utilize scale tier credits until they are
+                exhausted.
+              - If set to 'default', the request will be processed in the shared cluster.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1195,6 +1263,7 @@ async def create(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1228,6 +1297,7 @@ async def create(
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
                     "seed": seed,
+                    "service_tier": service_tier,
                     "stop": stop,
                     "stream": stream,
                     "stream_options": stream_options,
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index 61a94a258..5f4eaf336 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -56,6 +56,13 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
+    service_tier: Optional[Literal["scale", "default"]] = None
+    """The service tier used for processing the request.
+
+    This field is only included if the `service_tier` parameter is specified in the
+    request.
+    """
+
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
 
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 084a5fcc0..65643c7e6 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -122,6 +122,13 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
+    service_tier: Optional[Literal["scale", "default"]] = None
+    """The service tier used for processing the request.
+
+    This field is only included if the `service_tier` parameter is specified in the
+    request.
+    """
+
     system_fingerprint: Optional[str] = None
     """
     This fingerprint represents the backend configuration that the model runs with.
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 7dd7067f6..21187f374 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -146,6 +146,19 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
+    service_tier: Optional[Literal["auto", "default"]]
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', the system will utilize scale tier credits until they are
+      exhausted.
+    - If set to 'default', the request will be processed in the shared cluster.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     stop: Union[Optional[str], List[str]]
     """Up to 4 sequences where the API will stop generating further tokens."""
 
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 3099e1681..87df11d1e 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -60,6 +60,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             presence_penalty=-2,
             response_format={"type": "json_object"},
             seed=-9223372036854776000,
+            service_tier="auto",
             stop="string",
             stream=False,
             stream_options={"include_usage": True},
@@ -176,6 +177,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             presence_penalty=-2,
             response_format={"type": "json_object"},
             seed=-9223372036854776000,
+            service_tier="auto",
             stop="string",
             stream_options={"include_usage": True},
             temperature=1,
@@ -294,6 +296,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             presence_penalty=-2,
             response_format={"type": "json_object"},
             seed=-9223372036854776000,
+            service_tier="auto",
             stop="string",
             stream=False,
             stream_options={"include_usage": True},
@@ -410,6 +413,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             presence_penalty=-2,
             response_format={"type": "json_object"},
             seed=-9223372036854776000,
+            service_tier="auto",
             stop="string",
             stream_options={"include_usage": True},
             temperature=1,