From d04300a6af671788e478c5d4dfdb93763a729615 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 14 Mar 2024 17:53:19 +0000 Subject: [PATCH 01/12] Add an example of object-segmentation (ClipSeg) using graph mode. --- .../object-segementation/ClipSeg/README.md | 32 ++++++ .../ClipSeg/run_example.py | 108 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 examples/object-segementation/ClipSeg/README.md create mode 100644 examples/object-segementation/ClipSeg/run_example.py diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md new file mode 100644 index 000000000..f476a5a4e --- /dev/null +++ b/examples/object-segementation/ClipSeg/README.md @@ -0,0 +1,32 @@ + + +# Owl-ViT Examples + +This directory contains an example script that demonstrates using ClipSeg with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "CIDAS/clipseg-rd64-refined" \ + --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ + --prompt "a cat, a remote, a blanket" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py new file mode 100644 index 000000000..d4aa3efae --- /dev/null +++ b/examples/object-segementation/ClipSeg/run_example.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg + +from transformers import AutoProcessor, CLIPSegForImageSegmentation +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="CIDAS/clipseg-rd64-refined", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="http://images.cocodataset.org/val2017/000000039769.jpg", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--prompt", + default="a cat, a remote, a blanket", + type=str, + help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to print the classification results.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + processor = AutoProcessor.from_pretrained(args.model_name_or_path) + model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path) + + image = Image.open(requests.get(args.image_path, stream=True).raw) + texts = [] + for text in args.prompt.split(','): + texts.append(text) + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + logits = outputs.logits + print(logits.shape) + print("Logits: " + str(logits)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From f1ccb13dd2a12c5ae793151209fe1e370f6da147 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 14 Mar 2024 18:44:43 +0000 Subject: [PATCH 02/12] Updated readme and added codes for generating segmented images. --- examples/object-segementation/ClipSeg/README.md | 2 +- examples/object-segementation/ClipSeg/run_example.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md index f476a5a4e..d8c76272a 100644 --- a/examples/object-segementation/ClipSeg/README.md +++ b/examples/object-segementation/ClipSeg/README.md @@ -21,7 +21,7 @@ This directory contains an example script that demonstrates using ClipSeg with g python3 run_example.py \ --model_name_or_path "CIDAS/clipseg-rd64-refined" \ --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ - --prompt "a cat, a remote, a blanket" \ + --prompt "cat, remote, blanket" \ --warmup 3 \ --n_iterations 20 \ --use_hpu_graphs \ diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py index d4aa3efae..a10b8fa9d 100644 --- a/examples/object-segementation/ClipSeg/run_example.py +++ b/examples/object-segementation/ClipSeg/run_example.py @@ -23,6 +23,7 @@ import habana_frameworks.torch.core as htcore import time import argparse +from torchvision.utils import save_image from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi @@ -99,9 +100,14 @@ total_model_time = total_model_time + (model_end_time - model_start_time) if args.print_result: - logits = outputs.logits - print(logits.shape) - print("Logits: " + str(logits)) + if (i == 0): # generate/output once only + logits = outputs.logits + for j in range(logits.shape[0]): + threshold = 0.5 + segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0) + segmented_image = segmented_image.to(torch.float32) + save_image(segmented_image, 'segmented' + texts[j] + '.png') + print('Segmented images are generated.') print("n_iterations: " + str(args.n_iterations)) print("Total latency (ms): " + str(total_model_time*1000)) From bc093acede956ed4b1476a637fbe5ebec1720743 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 18 Mar 2024 18:08:06 +0000 Subject: [PATCH 03/12] Added an example code of SAM model. --- .../SegmentAnythingModel/README.md | 33 ++++++ .../SegmentAnythingModel/run_example.py | 109 ++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 examples/object-segementation/SegmentAnythingModel/README.md create mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md new file mode 100644 index 000000000..8a0f24300 --- /dev/null +++ b/examples/object-segementation/SegmentAnythingModel/README.md @@ -0,0 +1,33 @@ + + +# Owl-ViT Examples + +This directory contains an example script that demonstrates using SAM with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "facebook/sam-vit-huge" \ + --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ + --point_prompt "450,600" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) + - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py new file mode 100644 index 000000000..592f7f429 --- /dev/null +++ b/examples/object-segementation/SegmentAnythingModel/run_example.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/facebook/sam-vit-base + +from transformers import SamModel, SamProcessor +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="facebook/sam-vit-huge", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--point_prompt", + default="450, 600", + type=str, + help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to save the segmentation result.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + processor = SamProcessor.from_pretrained(args.model_name_or_path) + model = SamModel.from_pretrained(args.model_name_or_path) + + image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB") + points = [] + for text in args.point_prompt.split(','): + points.append(int(text)) + points = [[points]] + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + if (i == 0): # generate/output once only + iou = outputs.iou_scores + print("iou score: " + str(iou)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 23489864642aad77e20408a94b05c6321249b455 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 25 Mar 2024 20:41:46 +0000 Subject: [PATCH 04/12] Add this example to README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fabff9e26..5260e0625 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ The following model architectures, tasks and device distributions have been vali | OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | | ClipSeg | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
  • | | Llava / Llava-next | |
  • Single card
  • |
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | - +| Segment Anything Model | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation/SegmentAnythingModel)
  • | - Diffusers: From d7816c22220a5559a9f0dae07448797b0b807aed Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 25 Mar 2024 20:49:38 +0000 Subject: [PATCH 05/12] Expose the example in index.mdx. --- docs/source/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index b33cfd062..5afc2b9bf 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -72,7 +72,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be | OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | | ClipSeg | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
  • | | Llava / Llava-next | |
  • Single card
  • |
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | - +| SAM | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation/SegmentAnythingModel)
  • | - Diffusers From 8a27e86586486d106d8638c251c7065cc7b6aa0b Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 25 Mar 2024 20:53:07 +0000 Subject: [PATCH 06/12] Fixed the typo of the model name. --- examples/object-segementation/SegmentAnythingModel/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md index 8a0f24300..73a7129b2 100644 --- a/examples/object-segementation/SegmentAnythingModel/README.md +++ b/examples/object-segementation/SegmentAnythingModel/README.md @@ -11,7 +11,7 @@ See the License for the specific language governing permissions and limitations under the License. --> -# Owl-ViT Examples +# Segment Anything Model Examples This directory contains an example script that demonstrates using SAM with graph mode. From a16985b0e23211b7d2aef31241b0d934db3d97b2 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 25 Mar 2024 21:03:29 +0000 Subject: [PATCH 07/12] Remove unrelated files. --- .../object-segementation/ClipSeg/README.md | 32 ----- .../ClipSeg/run_example.py | 114 ------------------ 2 files changed, 146 deletions(-) delete mode 100644 examples/object-segementation/ClipSeg/README.md delete mode 100644 examples/object-segementation/ClipSeg/run_example.py diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md deleted file mode 100644 index d8c76272a..000000000 --- a/examples/object-segementation/ClipSeg/README.md +++ /dev/null @@ -1,32 +0,0 @@ - - -# Owl-ViT Examples - -This directory contains an example script that demonstrates using ClipSeg with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "CIDAS/clipseg-rd64-refined" \ - --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ - --prompt "cat, remote, blanket" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py deleted file mode 100644 index a10b8fa9d..000000000 --- a/examples/object-segementation/ClipSeg/run_example.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg - -from transformers import AutoProcessor, CLIPSegForImageSegmentation -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse -from torchvision.utils import save_image - -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--model_name_or_path", - default="CIDAS/clipseg-rd64-refined", - type=str, - help="Path of the pre-trained model", - ) - parser.add_argument( - "--image_path", - default="http://images.cocodataset.org/val2017/000000039769.jpg", - type=str, - help='Path of the input image. Should be a single string (eg: --image_path "URL")', - ) - parser.add_argument( - "--prompt", - default="a cat, a remote, a blanket", - type=str, - help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")', - ) - parser.add_argument( - "--use_hpu_graphs", - action="store_true", - help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", - ) - parser.add_argument( - "--bf16", - action="store_true", - help="Whether to use bf16 precision for classification.", - ) - parser.add_argument( - "--print_result", - action="store_true", - help="Whether to print the classification results.", - ) - parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") - parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") - - args = parser.parse_args() - - adapt_transformers_to_gaudi() - - processor = AutoProcessor.from_pretrained(args.model_name_or_path) - model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path) - - image = Image.open(requests.get(args.image_path, stream=True).raw) - texts = [] - for text in args.prompt.split(','): - texts.append(text) - - if args.use_hpu_graphs: - model = ht.hpu.wrap_in_hpu_graph(model) - - autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) - model.to("hpu") - - with torch.no_grad(), autocast: - for i in range(args.warmup): - inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") - outputs = model(**inputs) - torch.hpu.synchronize() - - total_model_time = 0 - for i in range(args.n_iterations): - inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") - model_start_time = time.time() - outputs = model(**inputs) - torch.hpu.synchronize() - model_end_time = time.time() - total_model_time = total_model_time + (model_end_time - model_start_time) - - if args.print_result: - if (i == 0): # generate/output once only - logits = outputs.logits - for j in range(logits.shape[0]): - threshold = 0.5 - segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0) - segmented_image = segmented_image.to(torch.float32) - save_image(segmented_image, 'segmented' + texts[j] + '.png') - print('Segmented images are generated.') - - print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 093158f8b4cf1a0eba163a0b23db02cc99365d55 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 15 Apr 2024 20:43:46 +0000 Subject: [PATCH 08/12] Added test case for SAM model. --- tests/test_modelenabling.py | 121 ++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 tests/test_modelenabling.py diff --git a/tests/test_modelenabling.py b/tests/test_modelenabling.py new file mode 100644 index 000000000..ab279bee6 --- /dev/null +++ b/tests/test_modelenabling.py @@ -0,0 +1,121 @@ +# coding=utf-8 +# Copyright 2024 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import requests +from PIL import Image +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse +from transformers import OwlViTProcessor, OwlViTForObjectDetection, SamProcessor, SamModel +import unittest +from unittest import TestCase +import numpy as np +import os + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +adapt_transformers_to_gaudi() + +# For Gaudi 2 +LATENCY_OWLVIT_BF16_GRAPH_BASELINE = 3.7109851837158203 +LATENCY_SAM_BF16_GRAPH_BASELINE = 98.92215728759766 + +class GaudiSAMTester(TestCase): + """ + Tests for Segment Anything Model - SAM + """ + def prepare_model_and_processor(self): + model = SamModel.from_pretrained("facebook/sam-vit-huge").to("hpu") + processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + model = model.eval() + return model, processor + + def prepare_data(self): + image = Image.open(requests.get("https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", stream=True).raw).convert("RGB") + input_points = [[[450, 600]]] + return input_points, image + + def test_inference_default(self): + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) + scores = outputs.iou_scores + scores = scores[0][0] + expected_scores = np.array([0.9912, 0.9818, 0.9666]) + self.assertEqual(len(scores), 3) + self.assertLess(np.abs(scores.cpu().detach().numpy() - expected_scores).max(), 0.02) + + def test_inference_bf16(self): + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + + with torch.autocast(device_type="hpu", dtype=torch.bfloat16): # Autocast BF16 + outputs = model(**inputs) + masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) + scores = outputs.iou_scores + scores = scores[0][0] + expected_scores = np.array([0.9912, 0.9818, 0.9666]) + self.assertEqual(len(scores), 3) + self.assertLess(np.abs(scores.to(torch.float32).cpu().detach().numpy() - expected_scores).max(), 0.02) + + def test_inference_hpu_graphs(self): + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + + model = ht.hpu.wrap_in_hpu_graph(model) #Apply graph + + outputs = model(**inputs) + masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) + scores = outputs.iou_scores + scores = scores[0][0] + expected_scores = np.array([0.9912, 0.9818, 0.9666]) + self.assertEqual(len(scores), 3) + self.assertLess(np.abs(scores.to(torch.float32).cpu().detach().numpy() - expected_scores).max(), 0.02) + + def test_no_latency_regression_bf16(self): + warmup = 3 + iterations = 10 + + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + + model = ht.hpu.wrap_in_hpu_graph(model) + + with torch.no_grad(), torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True): + for i in range(warmup): + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(iterations): + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + latency = total_model_time*1000/iterations # in terms of ms + self.assertGreaterEqual(latency, 0.95 * LATENCY_SAM_BF16_GRAPH_BASELINE) + +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file From 58042ebd472093b14944e56df629c1abf45fa596 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 22 Apr 2024 17:59:33 +0000 Subject: [PATCH 09/12] Renamed the test python file and add the related test into CI test. --- Makefile | 5 ++ tests/test_image_segmentation.py | 121 +++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 tests/test_image_segmentation.py diff --git a/Makefile b/Makefile index 6e87a399a..bec72a57b 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,11 @@ fast_tests_diffusers: python -m pip install .[tests] python -m pytest tests/test_diffusers.py +# Run unit and integration tests related to Image segmentation +fast_tests_image_segmentation: + python -m pip install .[tests] + python -m pytest tests/test_image_segmentation.py + # Run single-card non-regression tests slow_tests_1x: test_installs python -m pytest tests/test_examples.py -v -s -k "single_card" diff --git a/tests/test_image_segmentation.py b/tests/test_image_segmentation.py new file mode 100644 index 000000000..ab279bee6 --- /dev/null +++ b/tests/test_image_segmentation.py @@ -0,0 +1,121 @@ +# coding=utf-8 +# Copyright 2024 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import requests +from PIL import Image +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse +from transformers import OwlViTProcessor, OwlViTForObjectDetection, SamProcessor, SamModel +import unittest +from unittest import TestCase +import numpy as np +import os + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +adapt_transformers_to_gaudi() + +# For Gaudi 2 +LATENCY_OWLVIT_BF16_GRAPH_BASELINE = 3.7109851837158203 +LATENCY_SAM_BF16_GRAPH_BASELINE = 98.92215728759766 + +class GaudiSAMTester(TestCase): + """ + Tests for Segment Anything Model - SAM + """ + def prepare_model_and_processor(self): + model = SamModel.from_pretrained("facebook/sam-vit-huge").to("hpu") + processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + model = model.eval() + return model, processor + + def prepare_data(self): + image = Image.open(requests.get("https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", stream=True).raw).convert("RGB") + input_points = [[[450, 600]]] + return input_points, image + + def test_inference_default(self): + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) + scores = outputs.iou_scores + scores = scores[0][0] + expected_scores = np.array([0.9912, 0.9818, 0.9666]) + self.assertEqual(len(scores), 3) + self.assertLess(np.abs(scores.cpu().detach().numpy() - expected_scores).max(), 0.02) + + def test_inference_bf16(self): + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + + with torch.autocast(device_type="hpu", dtype=torch.bfloat16): # Autocast BF16 + outputs = model(**inputs) + masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) + scores = outputs.iou_scores + scores = scores[0][0] + expected_scores = np.array([0.9912, 0.9818, 0.9666]) + self.assertEqual(len(scores), 3) + self.assertLess(np.abs(scores.to(torch.float32).cpu().detach().numpy() - expected_scores).max(), 0.02) + + def test_inference_hpu_graphs(self): + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + + model = ht.hpu.wrap_in_hpu_graph(model) #Apply graph + + outputs = model(**inputs) + masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) + scores = outputs.iou_scores + scores = scores[0][0] + expected_scores = np.array([0.9912, 0.9818, 0.9666]) + self.assertEqual(len(scores), 3) + self.assertLess(np.abs(scores.to(torch.float32).cpu().detach().numpy() - expected_scores).max(), 0.02) + + def test_no_latency_regression_bf16(self): + warmup = 3 + iterations = 10 + + model, processor = self.prepare_model_and_processor() + input_points, image = self.prepare_data() + + model = ht.hpu.wrap_in_hpu_graph(model) + + with torch.no_grad(), torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True): + for i in range(warmup): + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(iterations): + inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + latency = total_model_time*1000/iterations # in terms of ms + self.assertGreaterEqual(latency, 0.95 * LATENCY_SAM_BF16_GRAPH_BASELINE) + +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file From 5cd8f2b0e18f68cdfaf9d3fe46dcdbb0125f2c94 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Tue, 30 Apr 2024 23:30:45 +0000 Subject: [PATCH 10/12] Aligned the file architecture by moving the files under object-segmentation. Used Automodel and related processor to replace model-specific API. Improved the testing logic. --- examples/object-segementation/README.md | 23 +++- .../SegmentAnythingModel/README.md | 33 ----- .../run_example.py => run_example_sam.py} | 17 +-- tests/test_image_segmentation.py | 35 ++--- tests/test_modelenabling.py | 121 ------------------ 5 files changed, 44 insertions(+), 185 deletions(-) delete mode 100644 examples/object-segementation/SegmentAnythingModel/README.md rename examples/object-segementation/{SegmentAnythingModel/run_example.py => run_example_sam.py} (93%) delete mode 100644 tests/test_modelenabling.py diff --git a/examples/object-segementation/README.md b/examples/object-segementation/README.md index 4afb59849..3204acab3 100644 --- a/examples/object-segementation/README.md +++ b/examples/object-segementation/README.md @@ -13,10 +13,12 @@ limitations under the License. # Object Segmentation Examples -This directory contains an example script that demonstrates how to perform object segmentation on Gaudi with graph mode. +This directory contains two example script that demonstrates how to perform object segmentation on Gaudi with graph mode. ## Single-HPU inference +### ClipSeg Model + ```bash python3 run_example.py \ --model_name_or_path "CIDAS/clipseg-rd64-refined" \ @@ -29,4 +31,21 @@ python3 run_example.py \ --print_result ``` Models that have been validated: - - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file + - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) + +### Segment Anything Model + +```bash +python3 run_example_sam.py \ + --model_name_or_path "facebook/sam-vit-huge" \ + --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ + --point_prompt "450,600" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) + - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md deleted file mode 100644 index 73a7129b2..000000000 --- a/examples/object-segementation/SegmentAnythingModel/README.md +++ /dev/null @@ -1,33 +0,0 @@ - - -# Segment Anything Model Examples - -This directory contains an example script that demonstrates using SAM with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "facebook/sam-vit-huge" \ - --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ - --point_prompt "450,600" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) - - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/run_example_sam.py similarity index 93% rename from examples/object-segementation/SegmentAnythingModel/run_example.py rename to examples/object-segementation/run_example_sam.py index 592f7f429..016b318be 100644 --- a/examples/object-segementation/SegmentAnythingModel/run_example.py +++ b/examples/object-segementation/run_example_sam.py @@ -15,17 +15,18 @@ # Copied from https://huggingface.co/facebook/sam-vit-base -from transformers import SamModel, SamProcessor -from PIL import Image +import argparse +import time + +import habana_frameworks.torch as ht import requests import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse +from PIL import Image +from transformers import AutoModel, AutoProcessor from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -69,8 +70,8 @@ adapt_transformers_to_gaudi() - processor = SamProcessor.from_pretrained(args.model_name_or_path) - model = SamModel.from_pretrained(args.model_name_or_path) + processor = AutoProcessor.from_pretrained(args.model_name_or_path) + model = AutoModel.from_pretrained(args.model_name_or_path) image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB") points = [] diff --git a/tests/test_image_segmentation.py b/tests/test_image_segmentation.py index ab279bee6..cae5042af 100644 --- a/tests/test_image_segmentation.py +++ b/tests/test_image_segmentation.py @@ -13,21 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import requests -from PIL import Image -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore import time -import argparse -from transformers import OwlViTProcessor, OwlViTForObjectDetection, SamProcessor, SamModel -import unittest from unittest import TestCase + +import habana_frameworks.torch as ht import numpy as np -import os +import requests +import torch +from PIL import Image +from transformers import AutoModel, AutoProcessor from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + adapt_transformers_to_gaudi() # For Gaudi 2 @@ -39,8 +37,8 @@ class GaudiSAMTester(TestCase): Tests for Segment Anything Model - SAM """ def prepare_model_and_processor(self): - model = SamModel.from_pretrained("facebook/sam-vit-huge").to("hpu") - processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") + model = AutoModel.from_pretrained("facebook/sam-vit-huge").to("hpu") + processor = AutoProcessor.from_pretrained("facebook/sam-vit-huge") model = model.eval() return model, processor @@ -54,7 +52,6 @@ def test_inference_default(self): input_points, image = self.prepare_data() inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") outputs = model(**inputs) - masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) scores = outputs.iou_scores scores = scores[0][0] expected_scores = np.array([0.9912, 0.9818, 0.9666]) @@ -68,7 +65,6 @@ def test_inference_bf16(self): with torch.autocast(device_type="hpu", dtype=torch.bfloat16): # Autocast BF16 outputs = model(**inputs) - masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) scores = outputs.iou_scores scores = scores[0][0] expected_scores = np.array([0.9912, 0.9818, 0.9666]) @@ -83,7 +79,6 @@ def test_inference_hpu_graphs(self): model = ht.hpu.wrap_in_hpu_graph(model) #Apply graph outputs = model(**inputs) - masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) scores = outputs.iou_scores scores = scores[0][0] expected_scores = np.array([0.9912, 0.9818, 0.9666]) @@ -102,20 +97,18 @@ def test_no_latency_regression_bf16(self): with torch.no_grad(), torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True): for i in range(warmup): inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") - outputs = model(**inputs) + _ = model(**inputs) torch.hpu.synchronize() - + total_model_time = 0 for i in range(iterations): inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") model_start_time = time.time() - outputs = model(**inputs) + _ = model(**inputs) torch.hpu.synchronize() model_end_time = time.time() total_model_time = total_model_time + (model_end_time - model_start_time) - + latency = total_model_time*1000/iterations # in terms of ms - self.assertGreaterEqual(latency, 0.95 * LATENCY_SAM_BF16_GRAPH_BASELINE) + self.assertLessEqual(latency, 1.05 * LATENCY_SAM_BF16_GRAPH_BASELINE) -# if __name__ == '__main__': -# unittest.main() \ No newline at end of file diff --git a/tests/test_modelenabling.py b/tests/test_modelenabling.py deleted file mode 100644 index ab279bee6..000000000 --- a/tests/test_modelenabling.py +++ /dev/null @@ -1,121 +0,0 @@ -# coding=utf-8 -# Copyright 2024 HuggingFace Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import requests -from PIL import Image -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse -from transformers import OwlViTProcessor, OwlViTForObjectDetection, SamProcessor, SamModel -import unittest -from unittest import TestCase -import numpy as np -import os - -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - -adapt_transformers_to_gaudi() - -# For Gaudi 2 -LATENCY_OWLVIT_BF16_GRAPH_BASELINE = 3.7109851837158203 -LATENCY_SAM_BF16_GRAPH_BASELINE = 98.92215728759766 - -class GaudiSAMTester(TestCase): - """ - Tests for Segment Anything Model - SAM - """ - def prepare_model_and_processor(self): - model = SamModel.from_pretrained("facebook/sam-vit-huge").to("hpu") - processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") - model = model.eval() - return model, processor - - def prepare_data(self): - image = Image.open(requests.get("https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", stream=True).raw).convert("RGB") - input_points = [[[450, 600]]] - return input_points, image - - def test_inference_default(self): - model, processor = self.prepare_model_and_processor() - input_points, image = self.prepare_data() - inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") - outputs = model(**inputs) - masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) - scores = outputs.iou_scores - scores = scores[0][0] - expected_scores = np.array([0.9912, 0.9818, 0.9666]) - self.assertEqual(len(scores), 3) - self.assertLess(np.abs(scores.cpu().detach().numpy() - expected_scores).max(), 0.02) - - def test_inference_bf16(self): - model, processor = self.prepare_model_and_processor() - input_points, image = self.prepare_data() - inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") - - with torch.autocast(device_type="hpu", dtype=torch.bfloat16): # Autocast BF16 - outputs = model(**inputs) - masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) - scores = outputs.iou_scores - scores = scores[0][0] - expected_scores = np.array([0.9912, 0.9818, 0.9666]) - self.assertEqual(len(scores), 3) - self.assertLess(np.abs(scores.to(torch.float32).cpu().detach().numpy() - expected_scores).max(), 0.02) - - def test_inference_hpu_graphs(self): - model, processor = self.prepare_model_and_processor() - input_points, image = self.prepare_data() - inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") - - model = ht.hpu.wrap_in_hpu_graph(model) #Apply graph - - outputs = model(**inputs) - masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()) - scores = outputs.iou_scores - scores = scores[0][0] - expected_scores = np.array([0.9912, 0.9818, 0.9666]) - self.assertEqual(len(scores), 3) - self.assertLess(np.abs(scores.to(torch.float32).cpu().detach().numpy() - expected_scores).max(), 0.02) - - def test_no_latency_regression_bf16(self): - warmup = 3 - iterations = 10 - - model, processor = self.prepare_model_and_processor() - input_points, image = self.prepare_data() - - model = ht.hpu.wrap_in_hpu_graph(model) - - with torch.no_grad(), torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True): - for i in range(warmup): - inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") - outputs = model(**inputs) - torch.hpu.synchronize() - - total_model_time = 0 - for i in range(iterations): - inputs = processor(image, input_points=input_points, return_tensors="pt").to("hpu") - model_start_time = time.time() - outputs = model(**inputs) - torch.hpu.synchronize() - model_end_time = time.time() - total_model_time = total_model_time + (model_end_time - model_start_time) - - latency = total_model_time*1000/iterations # in terms of ms - self.assertGreaterEqual(latency, 0.95 * LATENCY_SAM_BF16_GRAPH_BASELINE) - -# if __name__ == '__main__': -# unittest.main() \ No newline at end of file From 09a1a335b191973952eb863ed8dc7dee5c6725bd Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Tue, 30 Apr 2024 23:41:55 +0000 Subject: [PATCH 11/12] Update README.md and index.mdx. --- README.md | 2 +- docs/source/index.mdx | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5260e0625..9c88e1481 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ The following model architectures, tasks and device distributions have been vali | OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | | ClipSeg | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
  • | | Llava / Llava-next | |
  • Single card
  • |
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | -| Segment Anything Model | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation/SegmentAnythingModel)
  • | +| Segment Anything Model | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
  • | - Diffusers: diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 5afc2b9bf..132630c63 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -47,7 +47,11 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be | GPT-J |
  • DeepSpeed
  • |
  • Single card
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | GPT-NeoX |
  • DeepSpeed
  • |
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | OPT | |
  • DeepSpeed
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +<<<<<<< HEAD | Llama 2 / CodeLlama / Llama 3 / Llama Guard | ✅ | ✅ |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
  • [text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)
  • | +======= +| Llama 2 / CodeLlama / Llama 3 | ✅ | ✅ |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
  • | +>>>>>>> 455d728 (Update README.md and index.mdx.) | StableLM | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | Falcon |
  • LoRA
  • | ✅ |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | CodeGen | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | @@ -72,7 +76,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be | OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | | ClipSeg | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
  • | | Llava / Llava-next | |
  • Single card
  • |
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | -| SAM | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation/SegmentAnythingModel)
  • | +| SAM | |
  • Single card
  • |
  • [object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
  • | - Diffusers From b398475f0c4171cbd27f4cb32adb8c15c63ec996 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Wed, 29 May 2024 21:07:30 +0000 Subject: [PATCH 12/12] Update typos. --- docs/source/index.mdx | 4 ---- examples/object-segementation/README.md | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 132630c63..22b8dcba5 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -47,11 +47,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be | GPT-J |
  • DeepSpeed
  • |
  • Single card
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | GPT-NeoX |
  • DeepSpeed
  • |
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | OPT | |
  • DeepSpeed
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | -<<<<<<< HEAD | Llama 2 / CodeLlama / Llama 3 / Llama Guard | ✅ | ✅ |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
  • [text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)
  • | -======= -| Llama 2 / CodeLlama / Llama 3 | ✅ | ✅ |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
  • | ->>>>>>> 455d728 (Update README.md and index.mdx.) | StableLM | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | Falcon |
  • LoRA
  • | ✅ |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | CodeGen | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | diff --git a/examples/object-segementation/README.md b/examples/object-segementation/README.md index 3204acab3..fa1496a54 100644 --- a/examples/object-segementation/README.md +++ b/examples/object-segementation/README.md @@ -13,7 +13,7 @@ limitations under the License. # Object Segmentation Examples -This directory contains two example script that demonstrates how to perform object segmentation on Gaudi with graph mode. +This directory contains two examples script that demonstrates how to perform object segmentation on Gaudi with graph mode. ## Single-HPU inference