From 0e9c8cd2589c1292d4fb789d2a5cca2a7e8aad26 Mon Sep 17 00:00:00 2001 From: "Rossdan Craig rossdan@lastmileai.dev" <> Date: Wed, 10 Jan 2024 14:42:05 -0500 Subject: [PATCH] HF transformers: Small fixes nits Small fixes from comments from Sarmad + me from these diffs: - https://github.com/lastmile-ai/aiconfig/pull/854 - https://github.com/lastmile-ai/aiconfig/pull/855 - https://github.com/lastmile-ai/aiconfig/pull/821 Main things I did - rename `refine_chat_completion_params` --> `chat_completion_params` - edit `get_text_output` to not check for `OutputDataWithValue` - sorted the init file to be alphabetical - fixed some typos/print statements - made some error messages a bit more intuitive with prompt name - sorted some imports - fixed old class name `HuggingFaceAutomaticSpeechRecognition` --> `HuggingFaceAutomaticSpeechRecognitionTransformer` ## Test Plan These are all small nits and shouldn't change functionality --- .../__init__.py | 9 +++--- .../automatic_speech_recognition.py | 22 +++++++------ .../local_inference/image_2_text.py | 32 ++++++++++--------- .../local_inference/text_2_image.py | 16 +++++++--- .../local_inference/text_2_speech.py | 12 +++---- .../local_inference/text_generation.py | 15 ++++----- .../local_inference/text_summarization.py | 14 +++----- .../local_inference/text_translation.py | 14 +++----- .../text_generation.py | 17 ++++------ 9 files changed, 72 insertions(+), 79 deletions(-) diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/__init__.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/__init__.py index 0a0aae1cc..70f811527 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/__init__.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/__init__.py @@ -1,3 +1,4 @@ +from .local_inference.automatic_speech_recognition import HuggingFaceAutomaticSpeechRecognitionTransformer from .local_inference.image_2_text import HuggingFaceImage2TextTransformer from .local_inference.text_2_image import HuggingFaceText2ImageDiffusor from .local_inference.text_2_speech import HuggingFaceText2SpeechTransformer @@ -5,18 +6,16 @@ from .local_inference.text_summarization import HuggingFaceTextSummarizationTransformer from .local_inference.text_translation import HuggingFaceTextTranslationTransformer from .remote_inference_client.text_generation import HuggingFaceTextGenerationParser -from .local_inference.automatic_speech_recognition import HuggingFaceAutomaticSpeechRecognitionTransformer LOCAL_INFERENCE_CLASSES = [ + "HuggingFaceAutomaticSpeechRecognitionTransformer", + "HuggingFaceImage2TextTransformer", "HuggingFaceText2ImageDiffusor", + "HuggingFaceText2SpeechTransformer", "HuggingFaceTextGenerationTransformer", "HuggingFaceTextSummarizationTransformer", "HuggingFaceTextTranslationTransformer", - "HuggingFaceText2SpeechTransformer", - "HuggingFaceAutomaticSpeechRecognition", - "HuggingFaceImage2TextTransformer", - "HuggingFaceAutomaticSpeechRecognitionTransformer", ] REMOTE_INFERENCE_CLASSES = ["HuggingFaceTextGenerationParser"] __ALL__ = LOCAL_INFERENCE_CLASSES + REMOTE_INFERENCE_CLASSES diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py index fa2ca53bc..c627e8986 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py @@ -1,28 +1,29 @@ -from typing import Any, Dict, Literal, Optional, List, TYPE_CHECKING +from typing import Any, Dict, Optional, List, TYPE_CHECKING + +import torch +from transformers import pipeline, Pipeline from aiconfig import ParameterizedModelParser, InferenceOptions from aiconfig.callback import CallbackEvent -from pydantic import BaseModel -import torch from aiconfig.schema import Prompt, Output, ExecuteResult, Attachment -from transformers import pipeline, Pipeline if TYPE_CHECKING: from aiconfig import AIConfigRuntime -""" -Model Parser for HuggingFace ASR (Automatic Speech Recognition) models. -""" class HuggingFaceAutomaticSpeechRecognitionTransformer(ParameterizedModelParser): + """ + Model Parser for HuggingFace ASR (Automatic Speech Recognition) models. + """ + def __init__(self): """ Returns: - HuggingFaceAutomaticSpeechRecognition + HuggingFaceAutomaticSpeechRecognitionTransformer Usage: 1. Create a new model parser object with the model ID of the model to use. - parser = HuggingFaceAutomaticSpeechRecognition() + parser = HuggingFaceAutomaticSpeechRecognitionTransformer() 2. Add the model parser to the registry. config.register_model_parser(parser) """ @@ -55,7 +56,8 @@ async def serialize( Returns: str: Serialized representation of the prompt and inference settings. """ - raise NotImplementedError("serialize is not implemented for HuggingFaceAutomaticSpeechRecognition") + # TODO: See https://github.com/lastmile-ai/aiconfig/issues/822 + raise NotImplementedError("serialize is not implemented for HuggingFaceAutomaticSpeechRecognitionTransformer") async def deserialize( self, diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py index ddb0eb624..9d72ef9c2 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/image_2_text.py @@ -14,7 +14,6 @@ Attachment, ExecuteResult, Output, - OutputDataWithValue, Prompt, ) @@ -140,7 +139,6 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio outputs.append(output) prompt.outputs = outputs - print(f"{prompt.outputs=}") await aiconfig.callback_manager.run_callbacks( CallbackEvent( "on_run_complete", @@ -168,12 +166,9 @@ def get_output_text( output_data = output.data if isinstance(output_data, str): return output_data - if isinstance(output_data, OutputDataWithValue): - if isinstance(output_data.value, str): - return output_data.value - # HuggingFace Text summarization does not support function - # calls so shouldn't get here, but just being safe - return json.dumps(output_data.value, indent=2) + # HuggingFace image to text outputs should only ever be string + # format so shouldn't get here, but just being safe + return json.dumps(output_data, indent=2) return "" @@ -213,12 +208,19 @@ def construct_regular_output(result: Dict[str, str], execution_count: int) -> Ou return output -def validate_attachment_type_is_image(attachment: Attachment): +def validate_attachment_type_is_image( + prompt_name: str, + attachment: Attachment, +) -> None: + """ + Simple helper function to verify that the mimetype is set to a valid + image format. Raises ValueError if there's an issue. + """ if not hasattr(attachment, "mime_type"): - raise ValueError(f"Attachment has no mime type. Specify the image mimetype in the aiconfig") + raise ValueError(f"Attachment has no mime type for prompt '{prompt_name}'. Please specify the image mimetype in the AIConfig") if not attachment.mime_type.startswith("image/"): - raise ValueError(f"Invalid attachment mimetype {attachment.mime_type}. Expected image mimetype.") + raise ValueError(f"Invalid attachment mimetype {attachment.mime_type} for prompt '{prompt_name}'. Please use a mimetype that starts with 'image/'.") def validate_and_retrieve_images_from_attachments(prompt: Prompt) -> list[Union[str, Image]]: @@ -233,17 +235,17 @@ def validate_and_retrieve_images_from_attachments(prompt: Prompt) -> list[Union[ """ if not hasattr(prompt.input, "attachments") or len(prompt.input.attachments) == 0: - raise ValueError(f"No attachments found in input for prompt {prompt.name}. Please add an image attachment to the prompt input.") + raise ValueError(f"No attachments found in input for prompt '{prompt.name}'. Please add an image attachment to the prompt input.") images: list[Union[str, Image]] = [] for i, attachment in enumerate(prompt.input.attachments): - validate_attachment_type_is_image(attachment) + validate_attachment_type_is_image(prompt.name, attachment) input_data = attachment.data if not isinstance(input_data, str): - # See todo above, but for now only support uri's - raise ValueError(f"Attachment #{i} data is not a uri. Please specify a uri for the image attachment in prompt {prompt.name}.") + # See todo above, but for now only support uris and base64 + raise ValueError(f"Attachment #{i} data is not a uri or base64 string. Please specify a uri or base64 encoded string for the image attachment in prompt '{prompt.name}'.") # Really basic heurestic to check if the data is a base64 encoded str # vs. uri. This will be fixed once we have standardized inputs diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py index f61b1f8b8..f9a82fb13 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py @@ -2,6 +2,7 @@ import copy import io import itertools +import json import torch from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from diffusers import AutoPipelineForText2Image @@ -351,16 +352,21 @@ def get_output_text( # TODO (rossdanlm): Handle multiple outputs in list # https://github.com/lastmile-ai/aiconfig/issues/467 if output.output_type == "execute_result": - if isinstance(output.data, OutputDataWithStringValue): - return output.data.value - elif isinstance(output.data, str): - return output.data + output_data = output.data + if isinstance(output_data, OutputDataWithStringValue): + return output_data.value + # HuggingFace text to image outputs should only ever be in + # outputDataWithStringValue format so shouldn't get here, but + # just being safe + if isinstance(output_data, str): + return output_data + return json.dumps(output_data, indent=2) return "" def _get_device(self) -> str: if torch.cuda.is_available(): return "cuda" - elif torch.backends.mps.is_available(): + if torch.backends.mps.is_available(): return "mps" return "cpu" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py index 97e172fde..2474014cc 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py @@ -1,8 +1,8 @@ import base64 import copy import io +import json import numpy as np -import torch from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from transformers import Pipeline, pipeline from scipy.io.wavfile import write as write_wav @@ -12,7 +12,6 @@ from aiconfig.schema import ( ExecuteResult, Output, - OutputDataWithValue, Prompt, PromptMetadata, ) @@ -25,7 +24,7 @@ # Step 1: define Helpers def refine_pipeline_creation_params(model_settings: Dict[str, Any]) -> List[Dict[str, Any]]: - # There are from the transformers Github repo: + # These are from the transformers Github repo: # https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_utils.py#L2534 supported_keys = { "torch_dtype", @@ -228,8 +227,9 @@ def get_output_text( # TODO (rossdanlm): Handle multiple outputs in list # https://github.com/lastmile-ai/aiconfig/issues/467 if output.output_type == "execute_result": - if isinstance(output.data, OutputDataWithValue): - return output.data.value - elif isinstance(output.data, str): + if isinstance(output.data, str): return output.data + # HuggingFace text to speech outputs should only ever be string + # format so shouldn't get here, but just being safe + return json.dumps(output.data, indent=2) return "" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py index 4da5d7037..774365334 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py @@ -14,7 +14,6 @@ from aiconfig.schema import ( ExecuteResult, Output, - OutputDataWithValue, Prompt, PromptMetadata, ) @@ -26,7 +25,7 @@ # Step 1: define Helpers -def refine_chat_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]: +def refine_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]: """ Refines the completion params for the HF text generation api. Removes any unsupported params. The supported keys were found by looking at the HF text generation api. `huggingface_hub.InferenceClient.text_generation()` @@ -216,7 +215,7 @@ async def deserialize( """ # Build Completion data model_settings = self.get_model_settings(prompt, aiconfig) - completion_data = refine_chat_completion_params(model_settings) + completion_data = refine_completion_params(model_settings) #Add resolved prompt resolved_prompt = resolve_prompt(prompt, params, aiconfig) @@ -296,10 +295,8 @@ def get_output_text( output_data = output.data if isinstance(output_data, str): return output_data - if isinstance(output_data, OutputDataWithValue): - if isinstance(output_data.value, str): - return output_data.value - # HuggingFace Text generation does not support function - # calls so shouldn't get here, but just being safe - return json.dumps(output_data.value, indent=2) + # HuggingFace text generation outputs should only ever be in + # string format so shouldn't get here, but + # just being safe + return json.dumps(output_data, indent=2) return "" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py index 2b3b61358..532bbbadc 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py @@ -14,7 +14,6 @@ from aiconfig.schema import ( ExecuteResult, Output, - OutputDataWithValue, Prompt, PromptMetadata, ) @@ -26,7 +25,7 @@ # Step 1: define Helpers -def refine_chat_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]: +def refine_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]: """ Refines the completion params for the HF text summarization api. Removes any unsupported params. The supported keys were found by looking at the HF text summarization api. `huggingface_hub.InferenceClient.text_summarization()` @@ -221,7 +220,7 @@ async def deserialize( """ # Build Completion data model_settings = self.get_model_settings(prompt, aiconfig) - completion_data = refine_chat_completion_params(model_settings) + completion_data = refine_completion_params(model_settings) # Add resolved prompt resolved_prompt = resolve_prompt(prompt, params, aiconfig) @@ -301,10 +300,7 @@ def get_output_text( output_data = output.data if isinstance(output_data, str): return output_data - if isinstance(output_data, OutputDataWithValue): - if isinstance(output_data.value, str): - return output_data.value - # HuggingFace Text summarization does not support function - # calls so shouldn't get here, but just being safe - return json.dumps(output_data.value, indent=2) + # HuggingFace text summarization outputs should only ever be in + # string format so shouldn't get here, but just being safe + return json.dumps(output_data, indent=2) return "" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py index 860a11e46..5100e3e24 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py @@ -14,7 +14,6 @@ from aiconfig.schema import ( ExecuteResult, Output, - OutputDataWithValue, Prompt, PromptMetadata, ) @@ -26,7 +25,7 @@ # Step 1: define Helpers -def refine_chat_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]: +def refine_completion_params(model_settings: Dict[str, Any]) -> Dict[str, Any]: """ Refines the completion params for the HF text translation api. Removes any unsupported params. The supported keys were found by looking at the HF text translation api. `huggingface_hub.InferenceClient.text_translation()` @@ -223,7 +222,7 @@ async def deserialize( """ # Build Completion data model_settings = self.get_model_settings(prompt, aiconfig) - completion_data = refine_chat_completion_params(model_settings) + completion_data = refine_completion_params(model_settings) # Add resolved prompt resolved_prompt = resolve_prompt(prompt, params, aiconfig) @@ -304,10 +303,7 @@ def get_output_text( output_data = output.data if isinstance(output_data, str): return output_data - if isinstance(output_data, OutputDataWithValue): - if isinstance(output_data.value, str): - return output_data.value - # HuggingFace Text translation does not support function - # calls so shouldn't get here, but just being safe - return json.dumps(output_data.value, indent=2) + # HuggingFace text translation outputs should only ever be in + # string format so shouldn't get here, but just being safe + return json.dumps(output_data, indent=2) return "" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/remote_inference_client/text_generation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/remote_inference_client/text_generation.py index 896cee119..67406f811 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/remote_inference_client/text_generation.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/remote_inference_client/text_generation.py @@ -15,7 +15,6 @@ from aiconfig.schema import ( ExecuteResult, Output, - OutputDataWithValue, Prompt, PromptMetadata, ) @@ -29,9 +28,7 @@ # Step 1: define Helpers - - -def refine_chat_completion_params(model_settings: dict[Any, Any]) -> dict[str, Any]: +def refine_completion_params(model_settings: dict[Any, Any]) -> dict[str, Any]: """ Refines the completion params for the HF text generation api. Removes any unsupported params. The supported keys were found by looking at the HF text generation api. `huggingface_hub.InferenceClient.text_generation()` @@ -243,7 +240,7 @@ async def deserialize( # Build Completion data model_settings = self.get_model_settings(prompt, aiconfig) - completion_data = refine_chat_completion_params(model_settings) + completion_data = refine_completion_params(model_settings) completion_data["prompt"] = resolved_prompt @@ -318,10 +315,8 @@ def get_output_text( output_data = output.data if isinstance(output_data, str): return output_data - if isinstance(output_data, OutputDataWithValue): - if isinstance(output_data.value, str): - return output_data.value - # HuggingFace Text generation does not support function - # calls so shouldn't get here, but just being safe - return json.dumps(output_data.value, indent=2) + + # HuggingFace text generation outputs should only ever be string + # format so shouldn't get here, but just being safe + return json.dumps(output_data, indent=2) return ""