diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py
index 7cbf54ccf..12455f6d0 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py
@@ -179,7 +179,8 @@ def get_output_text(
 def refine_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]:
     """
     Refines the completion params for the HF image to text api. Removes any unsupported params.
-    The supported keys were found by looking at the HF ImageToTextPipeline.__call__ method
+    The supported keys were found by looking at the HF ImageToTextPipeline.__call__ method:
+    https://github.com/huggingface/transformers/blob/cbbe30749b425f7c327acdab11473b33567a6e26/src/transformers/pipelines/image_to_text.py#L83
     """
     supported_keys = {
         "max_new_tokens",
diff --git a/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceAutomaticSpeechRecognitionPromptSchema.ts b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceAutomaticSpeechRecognitionPromptSchema.ts
index 3af87d6dd..0650678b3 100644
--- a/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceAutomaticSpeechRecognitionPromptSchema.ts
+++ b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceAutomaticSpeechRecognitionPromptSchema.ts
@@ -15,7 +15,14 @@ export const HuggingFaceAutomaticSpeechRecognitionPromptSchema: PromptSchema = {
         items: {
           type: "attachment",
           required: ["data"],
-          mime_types: ["audio/mpeg", "audio/wav", "audio/webm", "audio/flac", "audio/ogg", "audio/ogg"],
+          mime_types: [
+            "audio/mpeg",
+            "audio/wav",
+            "audio/webm",
+            "audio/flac",
+            "audio/ogg",
+            "audio/ogg",
+          ],
           properties: {
             data: {
               type: "string",
@@ -45,10 +52,10 @@ export const HuggingFaceAutomaticSpeechRecognitionPromptSchema: PromptSchema = {
          bits at the end to make the final reconstitution as perfect as possible.
          Defaults to defaults to chunk_length_s / 6`,
       },
-      device:{
+      device: {
         type: "string",
         enum: ["cuda", "mps", "cpu"],
-        description: `The device to load the pipeline to. Mps backend not supported for all models.`
+        description: `The device to load the pipeline to. Mps backend not supported for all models.`,
       },
       framework: {
         type: "string",
@@ -65,15 +72,12 @@ export const HuggingFaceAutomaticSpeechRecognitionPromptSchema: PromptSchema = {
       return_timestamps: {
         type: "string",
         enum: ["word", "char", "True", ""],
-        description: `Only available for pure CTC models (Wav2Vec2, HuBERT, etc) and the Whisper model. Not available for other sequence-to-sequence models.`
+        description: `Only available for pure CTC models (Wav2Vec2, HuBERT, etc) and the Whisper model. Not available for other sequence-to-sequence models.`,
       },
       max_new_tokens: {
-        type: "number",
-        description: `The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt`
-      }
+        type: "integer",
+        description: `The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt`,
+      },
     },
   },
 };
-
-
-
diff --git a/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceImage2TextTransformerPromptSchema.ts b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceImage2TextTransformerPromptSchema.ts
index ea0a55ab4..337efd291 100644
--- a/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceImage2TextTransformerPromptSchema.ts
+++ b/python/src/aiconfig/editor/client/src/shared/prompt_schemas/HuggingFaceImage2TextTransformerPromptSchema.ts
@@ -1,6 +1,10 @@
 import { PromptSchema } from "../../utils/promptUtils";
 
 export const HuggingFaceImage2TextTransformerPromptSchema: PromptSchema = {
+  // See https://github.com/huggingface/transformers/blob/cbbe30749b425f7c327acdab11473b33567a6e26/src/transformers/pipelines/image_to_text.py#L83
+  // for settings and defaults. The settings below are supported settings
+  // specified in the HuggingFaceImage2TextTransformer
+  // refine_completion_params implementation.
   input: {
     type: "object",
     required: ["data"],
@@ -28,6 +32,16 @@ export const HuggingFaceImage2TextTransformerPromptSchema: PromptSchema = {
         type: "string",
         description: `Hugging Face model to use`,
       },
+      max_new_tokens: {
+        type: "integer",
+        description: `The amount of maximum tokens to generate. 
+        By default it will use \`generate\` default.`,
+      },
+      timeout: {
+        type: "number",
+        description: `The maximum time in seconds to wait for fetching images 
+        from the web. If None, no timeout is set and the call may block forever.`,
+      },
     },
   },
 };