Skip to content

Commit

Permalink
feat(api): add service tier argument for chat completions (#1486)
Browse files Browse the repository at this point in the history
  • Loading branch information
stainless-app[bot] committed Jun 18, 2024
1 parent 811f4e7 commit 6c8fcd5
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .stats.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
configured_endpoints: 64
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml
8 changes: 7 additions & 1 deletion src/openai/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def _build_request(
raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")

headers = self._build_headers(options)
params = _merge_mappings(self._custom_query, options.params)
params = _merge_mappings(self.default_query, options.params)
content_type = headers.get("Content-Type")

# If the given Content-Type header is multipart/form-data then it
Expand Down Expand Up @@ -593,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]:
**self._custom_headers,
}

@property
def default_query(self) -> dict[str, object]:
return {
**self._custom_query,
}

def _validate_headers(
self,
headers: Headers, # noqa: ARG002
Expand Down
70 changes: 70 additions & 0 deletions src/openai/resources/chat/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -163,6 +164,16 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
stop: Up to 4 sequences where the API will stop generating further tokens.
stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
Expand Down Expand Up @@ -236,6 +247,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -346,6 +358,16 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
stop: Up to 4 sequences where the API will stop generating further tokens.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -412,6 +434,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -522,6 +545,16 @@ def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
stop: Up to 4 sequences where the API will stop generating further tokens.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -587,6 +620,7 @@ def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -620,6 +654,7 @@ def create(
"presence_penalty": presence_penalty,
"response_format": response_format,
"seed": seed,
"service_tier": service_tier,
"stop": stop,
"stream": stream,
"stream_options": stream_options,
Expand Down Expand Up @@ -667,6 +702,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -771,6 +807,16 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
stop: Up to 4 sequences where the API will stop generating further tokens.
stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
Expand Down Expand Up @@ -844,6 +890,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -954,6 +1001,16 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
stop: Up to 4 sequences where the API will stop generating further tokens.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -1020,6 +1077,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
temperature: Optional[float] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -1130,6 +1188,16 @@ async def create(
should refer to the `system_fingerprint` response parameter to monitor changes
in the backend.
service_tier: Specifies the latency tier to use for processing the request. This parameter is
relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
stop: Up to 4 sequences where the API will stop generating further tokens.
stream_options: Options for streaming response. Only set this when you set `stream: true`.
Expand Down Expand Up @@ -1195,6 +1263,7 @@ async def create(
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
seed: Optional[int] | NotGiven = NOT_GIVEN,
service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
Expand Down Expand Up @@ -1228,6 +1297,7 @@ async def create(
"presence_penalty": presence_penalty,
"response_format": response_format,
"seed": seed,
"service_tier": service_tier,
"stop": stop,
"stream": stream,
"stream_options": stream_options,
Expand Down
7 changes: 7 additions & 0 deletions src/openai/types/chat/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ class ChatCompletion(BaseModel):
object: Literal["chat.completion"]
"""The object type, which is always `chat.completion`."""

service_tier: Optional[Literal["scale", "default"]] = None
"""The service tier used for processing the request.
This field is only included if the `service_tier` parameter is specified in the
request.
"""

system_fingerprint: Optional[str] = None
"""This fingerprint represents the backend configuration that the model runs with.
Expand Down
7 changes: 7 additions & 0 deletions src/openai/types/chat/chat_completion_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ class ChatCompletionChunk(BaseModel):
object: Literal["chat.completion.chunk"]
"""The object type, which is always `chat.completion.chunk`."""

service_tier: Optional[Literal["scale", "default"]] = None
"""The service tier used for processing the request.
This field is only included if the `service_tier` parameter is specified in the
request.
"""

system_fingerprint: Optional[str] = None
"""
This fingerprint represents the backend configuration that the model runs with.
Expand Down
13 changes: 13 additions & 0 deletions src/openai/types/chat/completion_create_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,19 @@ class CompletionCreateParamsBase(TypedDict, total=False):
in the backend.
"""

service_tier: Optional[Literal["auto", "default"]]
"""Specifies the latency tier to use for processing the request.
This parameter is relevant for customers subscribed to the scale tier service:
- If set to 'auto', the system will utilize scale tier credits until they are
exhausted.
- If set to 'default', the request will be processed in the shared cluster.
When this parameter is set, the response body will include the `service_tier`
utilized.
"""

stop: Union[Optional[str], List[str]]
"""Up to 4 sequences where the API will stop generating further tokens."""

Expand Down
4 changes: 4 additions & 0 deletions tests/api_resources/chat/test_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream=False,
stream_options={"include_usage": True},
Expand Down Expand Up @@ -176,6 +177,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream_options={"include_usage": True},
temperature=1,
Expand Down Expand Up @@ -294,6 +296,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream=False,
stream_options={"include_usage": True},
Expand Down Expand Up @@ -410,6 +413,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
presence_penalty=-2,
response_format={"type": "json_object"},
seed=-9223372036854776000,
service_tier="auto",
stop="string",
stream_options={"include_usage": True},
temperature=1,
Expand Down

0 comments on commit 6c8fcd5

Please sign in to comment.