-
Notifications
You must be signed in to change notification settings - Fork 69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add model setting completion params for Image2Text prompt schema #875
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,14 @@ export const HuggingFaceAutomaticSpeechRecognitionPromptSchema: PromptSchema = { | |
items: { | ||
type: "attachment", | ||
required: ["data"], | ||
mime_types: ["audio/mpeg", "audio/wav", "audio/webm", "audio/flac", "audio/ogg", "audio/ogg"], | ||
mime_types: [ | ||
"audio/mpeg", | ||
"audio/wav", | ||
"audio/webm", | ||
"audio/flac", | ||
"audio/ogg", | ||
"audio/ogg", | ||
], | ||
properties: { | ||
data: { | ||
type: "string", | ||
|
@@ -45,10 +52,10 @@ export const HuggingFaceAutomaticSpeechRecognitionPromptSchema: PromptSchema = { | |
bits at the end to make the final reconstitution as perfect as possible. | ||
Defaults to defaults to chunk_length_s / 6`, | ||
}, | ||
device:{ | ||
device: { | ||
type: "string", | ||
enum: ["cuda", "mps", "cpu"], | ||
description: `The device to load the pipeline to. Mps backend not supported for all models.` | ||
description: `The device to load the pipeline to. Mps backend not supported for all models.`, | ||
}, | ||
framework: { | ||
type: "string", | ||
|
@@ -65,15 +72,12 @@ export const HuggingFaceAutomaticSpeechRecognitionPromptSchema: PromptSchema = { | |
return_timestamps: { | ||
type: "string", | ||
enum: ["word", "char", "True", ""], | ||
description: `Only available for pure CTC models (Wav2Vec2, HuBERT, etc) and the Whisper model. Not available for other sequence-to-sequence models.` | ||
description: `Only available for pure CTC models (Wav2Vec2, HuBERT, etc) and the Whisper model. Not available for other sequence-to-sequence models.`, | ||
}, | ||
max_new_tokens: { | ||
type: "number", | ||
description: `The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt` | ||
} | ||
type: "integer", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because number is a float while integer is an integer value. I think this was incorrectly transcribed the first time |
||
description: `The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt`, | ||
}, | ||
}, | ||
}, | ||
}; | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
import { PromptSchema } from "../../utils/promptUtils"; | ||
|
||
export const HuggingFaceImage2TextTransformerPromptSchema: PromptSchema = { | ||
// See https://github.com/huggingface/transformers/blob/cbbe30749b425f7c327acdab11473b33567a6e26/src/transformers/pipelines/image_to_text.py#L83 | ||
// for settings and defaults. The settings below are supported settings | ||
// specified in the HuggingFaceImage2TextTransformer | ||
// refine_completion_params implementation. | ||
input: { | ||
type: "object", | ||
required: ["data"], | ||
|
@@ -28,6 +32,16 @@ export const HuggingFaceImage2TextTransformerPromptSchema: PromptSchema = { | |
type: "string", | ||
description: `Hugging Face model to use`, | ||
}, | ||
max_new_tokens: { | ||
type: "integer", | ||
description: `The amount of maximum tokens to generate. | ||
By default it will use \`generate\` default.`, | ||
}, | ||
timeout: { | ||
type: "number", | ||
description: `The maximum time in seconds to wait for fetching images | ||
from the web. If None, no timeout is set and the call may block forever.`, | ||
}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh wow that's a short list of possible params There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea it's actually pretty wild. It's why I left a comment about it in in summary for #855 |
||
}, | ||
}, | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks duplicated. Error?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's just auto-formatting when I hit the CMD + S