camel-ai · Wendong-Fan · Jun 5, 2024 · Nov 23, 2023 · Nov 25, 2023 · Nov 30, 2023
diff --git a/camel/embeddings/__init__.py b/camel/embeddings/__init__.py
@@ -13,8 +13,10 @@
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
 from .base import BaseEmbedding
 from .openai_embedding import OpenAIEmbedding
+from .clip_embedding import CLIPEmbedding
 
 __all__ = [
  "BaseEmbedding",
  "OpenAIEmbedding",
+ "CLIPEmbedding",
 ]
diff --git a/camel/embeddings/clip_embedding.py b/camel/embeddings/clip_embedding.py
@@ -0,0 +1,91 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from typing import Any, List, Union
+
+from PIL import Image
+
+from camel.embeddings import BaseEmbedding
+
+
+class CLIPEmbedding(BaseEmbedding[Union[str, Image.Image]]):
+ r"""Provides image embedding functionalities using CLIP model.
+
+ Args:
+ model_name : The model type to be used for generating embeddings.
+ And the default value is: obj:`openai/clip-vit-base-patch32`.
+
+ Raises:
+ RuntimeError: If an unsupported model type is specified.
+ """
+
+ def __init__(self,
+ model_name: str = "openai/clip-vit-base-patch32") -> None:
+ r"""Initializes the: obj: `CLIPEmbedding` class with a specified model
+ and return the dimension of embeddings.
+
+ Args:
+ model_name (str, optional): The version name of the model to use.
+ (default: :obj:`openai/clip-vit-base-patch32`)
+ """
+
+ from transformers import CLIPModel, CLIPProcessor
+ self.model = CLIPModel.from_pretrained(model_name)
+ self.processor = CLIPProcessor.from_pretrained(model_name)
+ text = 'dimension'
+ inputs = self.processor(text=[text], return_tensors="pt")
+ self.dim = self.model.get_text_features(**inputs).shape[1]
+
+ def embed_list(
+ self,
+ objs: List[Union[Image.Image, str]], # to do
+ **kwargs: Any,
+ ) -> List[List[float]]:
+ r"""Generates embeddings for the given images or texts.
+
+ Args:
+ objs (List[Image.Image|str]): The list of images or texts for
+ which to generate the embeddings.
+ **kwargs (Any): Extra kwargs passed to the embedding API.
+
+ Returns:
+ List[List[float]]: A list that represents the generated embedding
+ as a list of floating-point numbers.
+ """
+ if not objs:
+ raise ValueError("Input text list is empty.")
+ result_list = []
+ for obj in objs:
+ if isinstance(obj, Image.Image):
+ input = self.processor(images=obj, return_tensors="pt",
+ padding=True)
+ image_feature = self.model.get_image_features(**input).tolist()
+ result_list.extend(image_feature)
+ elif isinstance(obj, str):
+ input = self.processor(text=obj, return_tensors="pt",
+ padding=True)
+ text_feature = self.model.get_text_features(**input).tolist()
+ result_list.extend(text_feature)
+
+ else:
+ raise ValueError("Input type is not image nor text.")
+ return result_list
+
+ def get_output_dim(self) -> int:
+ r"""Returns the output dimension of the embeddings.
+
+ Returns:
+ int: The dimensionality of the embedding for the current model.
+ """
+
+ return self.dim
diff --git a/licenses/update_license.py b/licenses/update_license.py
@@ -39,10 +39,10 @@ def update_license_in_file(
  start_line_start_with: str,
  end_line_start_with: str,
 ) -> bool:
- with open(file_path, 'r') as f:
+ with open(file_path, 'r', encoding='utf-8') as f: # for windows compatibility
  content = f.read()
 
- with open(license_template_path, 'r') as f:
+ with open(license_template_path, 'r', encoding='utf-8') as f:
  new_license = f.read().strip()
 
  maybe_existing_licenses = re.findall(r'^#.*?(?=\n)', content,

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -55,6 +55,7 @@ PyMuPDF = { version = "^1.22.5", optional = true }
 wikipedia = { version = "^1", optional = true }
 pyowm = { version = "^3.3.0", optional = true }
 unstructured = { version = "^0.10.30", optional = true }
+pillow = { version = "^10.2.0", optional = true }
 
 # vector-databases
 qdrant-client = { version = "^1.6.4", optional = true }

diff --git a/test/embeddings/test_clip_embeddings.py b/test/embeddings/test_clip_embeddings.py
@@ -0,0 +1,69 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+import pytest
+import requests
+from PIL import Image
+from transformers import CLIPModel, CLIPProcessor
+
+from camel.embeddings import CLIPEmbedding
+
+
+def test_CLIPEmbedding_initialization():
+ embedding = CLIPEmbedding()
+ assert embedding is not None
+ assert isinstance(embedding.model, CLIPModel)
+ assert isinstance(embedding.processor, CLIPProcessor)
+
+
+def test_image_embed_list_with_valid_input():
+ embedding = CLIPEmbedding()
+ # Test with the specific images
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+ image = Image.open(requests.get(url, stream=True).raw)
+ test_images = [image, image]
+ embeddings = embedding.embed_list(test_images)
+ assert isinstance(embeddings, list)
+ assert len(embeddings) == 2
+ for e in embeddings:
+ assert len(e) == embedding.get_output_dim()
+
+
+def test_image_embed_list_with_empty_input():
+ embedding = CLIPEmbedding()
+ with pytest.raises(ValueError):
+ embedding.embed_list([])
+
+
+def test_text_embed_list_with_valid_input():
+ embedding = CLIPEmbedding()
+ # Test with the specific texts
+ test_texts = ['Hello world', 'Testing sentence embeddings']
+ embeddings = embedding.embed_list(test_texts)
+ assert isinstance(embeddings, list)
+ assert len(embeddings) == 2
+ for e in embeddings:
+ assert len(e) == embedding.get_output_dim()
+
+
+def test_text_embed_list_with_empty_input():
+ embedding = CLIPEmbedding()
+ with pytest.raises(ValueError):
+ embedding.embed_list([])
+
+
+def test_get_output_dim():
+ embedding = CLIPEmbedding()
+ output_dim = embedding.get_output_dim()
+ assert isinstance(output_dim, int)
+ assert output_dim > 0