From 53098593bf6035b397b7129b41f9c97a509c94e8 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 14 Mar 2024 17:53:19 +0000 Subject: [PATCH 01/13] Add an example of object-segmentation (ClipSeg) using graph mode. --- .../object-segementation/ClipSeg/README.md | 32 ++++++ .../ClipSeg/run_example.py | 108 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 examples/object-segementation/ClipSeg/README.md create mode 100644 examples/object-segementation/ClipSeg/run_example.py diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md new file mode 100644 index 000000000..f476a5a4e --- /dev/null +++ b/examples/object-segementation/ClipSeg/README.md @@ -0,0 +1,32 @@ + + +# Owl-ViT Examples + +This directory contains an example script that demonstrates using ClipSeg with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "CIDAS/clipseg-rd64-refined" \ + --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ + --prompt "a cat, a remote, a blanket" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py new file mode 100644 index 000000000..d4aa3efae --- /dev/null +++ b/examples/object-segementation/ClipSeg/run_example.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg + +from transformers import AutoProcessor, CLIPSegForImageSegmentation +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="CIDAS/clipseg-rd64-refined", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="http://images.cocodataset.org/val2017/000000039769.jpg", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--prompt", + default="a cat, a remote, a blanket", + type=str, + help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to print the classification results.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + processor = AutoProcessor.from_pretrained(args.model_name_or_path) + model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path) + + image = Image.open(requests.get(args.image_path, stream=True).raw) + texts = [] + for text in args.prompt.split(','): + texts.append(text) + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + logits = outputs.logits + print(logits.shape) + print("Logits: " + str(logits)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 80ca06bafe948d38265cf0118dfb0603d6c1e85a Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 14 Mar 2024 18:44:43 +0000 Subject: [PATCH 02/13] Updated readme and added codes for generating segmented images. --- examples/object-segementation/ClipSeg/README.md | 2 +- examples/object-segementation/ClipSeg/run_example.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md index f476a5a4e..d8c76272a 100644 --- a/examples/object-segementation/ClipSeg/README.md +++ b/examples/object-segementation/ClipSeg/README.md @@ -21,7 +21,7 @@ This directory contains an example script that demonstrates using ClipSeg with g python3 run_example.py \ --model_name_or_path "CIDAS/clipseg-rd64-refined" \ --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ - --prompt "a cat, a remote, a blanket" \ + --prompt "cat, remote, blanket" \ --warmup 3 \ --n_iterations 20 \ --use_hpu_graphs \ diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py index d4aa3efae..a10b8fa9d 100644 --- a/examples/object-segementation/ClipSeg/run_example.py +++ b/examples/object-segementation/ClipSeg/run_example.py @@ -23,6 +23,7 @@ import habana_frameworks.torch.core as htcore import time import argparse +from torchvision.utils import save_image from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi @@ -99,9 +100,14 @@ total_model_time = total_model_time + (model_end_time - model_start_time) if args.print_result: - logits = outputs.logits - print(logits.shape) - print("Logits: " + str(logits)) + if (i == 0): # generate/output once only + logits = outputs.logits + for j in range(logits.shape[0]): + threshold = 0.5 + segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0) + segmented_image = segmented_image.to(torch.float32) + save_image(segmented_image, 'segmented' + texts[j] + '.png') + print('Segmented images are generated.') print("n_iterations: " + str(args.n_iterations)) print("Total latency (ms): " + str(total_model_time*1000)) From b99dfdbdc0437d76841f1c4f0ac0e0f9a15b1fc8 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 18 Mar 2024 18:08:06 +0000 Subject: [PATCH 03/13] Added an example code of SAM model. --- .../SegmentAnythingModel/README.md | 33 ++++++ .../SegmentAnythingModel/run_example.py | 109 ++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 examples/object-segementation/SegmentAnythingModel/README.md create mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md new file mode 100644 index 000000000..8a0f24300 --- /dev/null +++ b/examples/object-segementation/SegmentAnythingModel/README.md @@ -0,0 +1,33 @@ + + +# Owl-ViT Examples + +This directory contains an example script that demonstrates using SAM with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "facebook/sam-vit-huge" \ + --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ + --point_prompt "450,600" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) + - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py new file mode 100644 index 000000000..592f7f429 --- /dev/null +++ b/examples/object-segementation/SegmentAnythingModel/run_example.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/facebook/sam-vit-base + +from transformers import SamModel, SamProcessor +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="facebook/sam-vit-huge", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--point_prompt", + default="450, 600", + type=str, + help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to save the segmentation result.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + processor = SamProcessor.from_pretrained(args.model_name_or_path) + model = SamModel.from_pretrained(args.model_name_or_path) + + image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB") + points = [] + for text in args.point_prompt.split(','): + points.append(int(text)) + points = [[points]] + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + if (i == 0): # generate/output once only + iou = outputs.iou_scores + print("iou score: " + str(iou)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 6166bdff5c87c197f554efddca363158b46a266f Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 21 Mar 2024 17:51:56 +0000 Subject: [PATCH 04/13] Add an example of TIMM/FastViT. --- .../timm_fastvit/README.md | 33 ++++++ .../timm_fastvit/run_example.py | 104 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 examples/image-classification/timm_fastvit/README.md create mode 100644 examples/image-classification/timm_fastvit/run_example.py diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md new file mode 100644 index 000000000..65221adaa --- /dev/null +++ b/examples/image-classification/timm_fastvit/README.md @@ -0,0 +1,33 @@ + + +# FastViT Examples + +This directory contains an example script that demonstrates using FastViT with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "timm/fastvit_t8.apple_in1k" \ + --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k) + - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k) + - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k) \ No newline at end of file diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/timm_fastvit/run_example.py new file mode 100644 index 000000000..2a2e55d05 --- /dev/null +++ b/examples/image-classification/timm_fastvit/run_example.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k + +from transformers import AutoProcessor, CLIPSegForImageSegmentation +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse +from torchvision.utils import save_image +import timm + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="timm/fastvit_t8.apple_in1k", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to print the classification results.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + + model = timm.create_model(args.model_name_or_path, pretrained=True) + model.to('hpu') + model = model.eval() + data_config = timm.data.resolve_model_data_config(model) + transforms = timm.data.create_transform(**data_config, is_training=False) + + img = Image.open(requests.get(args.image_path, stream=True).raw) + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = transforms(img).unsqueeze(0).to('hpu') + outputs = model(inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = transforms(img).unsqueeze(0).to('hpu') + model_start_time = time.time() + outputs = model(inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5) + print("top5_class_indices: " + str(top5_class_indices)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 388c02bdf72e0c6fa1a7eabb65bef74cdadda40a Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 25 Mar 2024 21:00:47 +0000 Subject: [PATCH 05/13] Removed unrelated files. --- .../object-segementation/ClipSeg/README.md | 32 ----- .../ClipSeg/run_example.py | 114 ------------------ .../SegmentAnythingModel/README.md | 33 ----- .../SegmentAnythingModel/run_example.py | 109 ----------------- 4 files changed, 288 deletions(-) delete mode 100644 examples/object-segementation/ClipSeg/README.md delete mode 100644 examples/object-segementation/ClipSeg/run_example.py delete mode 100644 examples/object-segementation/SegmentAnythingModel/README.md delete mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md deleted file mode 100644 index d8c76272a..000000000 --- a/examples/object-segementation/ClipSeg/README.md +++ /dev/null @@ -1,32 +0,0 @@ - - -# Owl-ViT Examples - -This directory contains an example script that demonstrates using ClipSeg with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "CIDAS/clipseg-rd64-refined" \ - --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ - --prompt "cat, remote, blanket" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py deleted file mode 100644 index a10b8fa9d..000000000 --- a/examples/object-segementation/ClipSeg/run_example.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg - -from transformers import AutoProcessor, CLIPSegForImageSegmentation -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse -from torchvision.utils import save_image - -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--model_name_or_path", - default="CIDAS/clipseg-rd64-refined", - type=str, - help="Path of the pre-trained model", - ) - parser.add_argument( - "--image_path", - default="http://images.cocodataset.org/val2017/000000039769.jpg", - type=str, - help='Path of the input image. Should be a single string (eg: --image_path "URL")', - ) - parser.add_argument( - "--prompt", - default="a cat, a remote, a blanket", - type=str, - help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")', - ) - parser.add_argument( - "--use_hpu_graphs", - action="store_true", - help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", - ) - parser.add_argument( - "--bf16", - action="store_true", - help="Whether to use bf16 precision for classification.", - ) - parser.add_argument( - "--print_result", - action="store_true", - help="Whether to print the classification results.", - ) - parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") - parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") - - args = parser.parse_args() - - adapt_transformers_to_gaudi() - - processor = AutoProcessor.from_pretrained(args.model_name_or_path) - model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path) - - image = Image.open(requests.get(args.image_path, stream=True).raw) - texts = [] - for text in args.prompt.split(','): - texts.append(text) - - if args.use_hpu_graphs: - model = ht.hpu.wrap_in_hpu_graph(model) - - autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) - model.to("hpu") - - with torch.no_grad(), autocast: - for i in range(args.warmup): - inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") - outputs = model(**inputs) - torch.hpu.synchronize() - - total_model_time = 0 - for i in range(args.n_iterations): - inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") - model_start_time = time.time() - outputs = model(**inputs) - torch.hpu.synchronize() - model_end_time = time.time() - total_model_time = total_model_time + (model_end_time - model_start_time) - - if args.print_result: - if (i == 0): # generate/output once only - logits = outputs.logits - for j in range(logits.shape[0]): - threshold = 0.5 - segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0) - segmented_image = segmented_image.to(torch.float32) - save_image(segmented_image, 'segmented' + texts[j] + '.png') - print('Segmented images are generated.') - - print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md deleted file mode 100644 index 8a0f24300..000000000 --- a/examples/object-segementation/SegmentAnythingModel/README.md +++ /dev/null @@ -1,33 +0,0 @@ - - -# Owl-ViT Examples - -This directory contains an example script that demonstrates using SAM with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "facebook/sam-vit-huge" \ - --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ - --point_prompt "450,600" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) - - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py deleted file mode 100644 index 592f7f429..000000000 --- a/examples/object-segementation/SegmentAnythingModel/run_example.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# Copied from https://huggingface.co/facebook/sam-vit-base - -from transformers import SamModel, SamProcessor -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse - -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--model_name_or_path", - default="facebook/sam-vit-huge", - type=str, - help="Path of the pre-trained model", - ) - parser.add_argument( - "--image_path", - default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", - type=str, - help='Path of the input image. Should be a single string (eg: --image_path "URL")', - ) - parser.add_argument( - "--point_prompt", - default="450, 600", - type=str, - help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")', - ) - parser.add_argument( - "--use_hpu_graphs", - action="store_true", - help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", - ) - parser.add_argument( - "--bf16", - action="store_true", - help="Whether to use bf16 precision for classification.", - ) - parser.add_argument( - "--print_result", - action="store_true", - help="Whether to save the segmentation result.", - ) - parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") - parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") - - args = parser.parse_args() - - adapt_transformers_to_gaudi() - - processor = SamProcessor.from_pretrained(args.model_name_or_path) - model = SamModel.from_pretrained(args.model_name_or_path) - - image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB") - points = [] - for text in args.point_prompt.split(','): - points.append(int(text)) - points = [[points]] - - if args.use_hpu_graphs: - model = ht.hpu.wrap_in_hpu_graph(model) - - autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) - model.to("hpu") - - with torch.no_grad(), autocast: - for i in range(args.warmup): - inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") - outputs = model(**inputs) - torch.hpu.synchronize() - - total_model_time = 0 - for i in range(args.n_iterations): - inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") - model_start_time = time.time() - outputs = model(**inputs) - torch.hpu.synchronize() - model_end_time = time.time() - total_model_time = total_model_time + (model_end_time - model_start_time) - - if args.print_result: - if (i == 0): # generate/output once only - iou = outputs.iou_scores - print("iou score: " + str(iou)) - - print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From f14d4a4433b343c7b367af14d128e53a8d6d53e6 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Wed, 15 May 2024 17:34:19 +0000 Subject: [PATCH 06/13] Added Readme, readme.idx etc. Aligned the style. --- README.md | 85 ++++++++++++------- docs/source/index.mdx | 37 +++++--- examples/image-classification/README.md | 23 ++++- .../image-classification/requirements.txt | 1 + .../run_example.py => run_timm_example.py} | 28 +++--- .../timm_fastvit/README.md | 33 ------- 6 files changed, 118 insertions(+), 89 deletions(-) rename examples/image-classification/{timm_fastvit/run_example.py => run_timm_example.py} (88%) delete mode 100644 examples/image-classification/timm_fastvit/README.md diff --git a/README.md b/README.md index 09a80082d..f3da039fe 100644 --- a/README.md +++ b/README.md @@ -14,21 +14,26 @@ See the License for the specific language governing permissions and limitations under the License. --> -![](https://github.com/huggingface/optimum-habana/blob/main/readme_logo.png) + + + + + + -# Optimum Habana -๐Ÿค— Optimum Habana is the interface between the ๐Ÿค— Transformers and Diffusers libraries and [Habana's Gaudi processor (HPU)](https://docs.habana.ai/en/latest/index.html). +# Optimum for Intelยฎ Gaudiยฎ Accelerators + +Optimum for Intel Gaudi - a.k.a. `optimum-habana` - is the interface between the Transformers and Diffusers libraries and [Intel Gaudi AI Accelerators (HPU)](https://docs.habana.ai/en/latest/index.html). It provides a set of tools enabling easy model loading, training and inference on single- and multi-HPU settings for different downstream tasks. -The list of officially validated models and tasks is available [here](https://github.com/huggingface/optimum-habana#validated-models). Users can try other models and tasks with only few changes. +The list of officially validated models and tasks is available [here](https://github.com/huggingface/optimum-habana#validated-models). Users can try other of the thousands of Hugging Face models on Intel Gaudi accelerators and tasks with only few changes. -## What is a Habana Processing Unit (HPU)? +## What are Intel Gaudi AI Accelerators (HPUs)? HPUs offer fast model training and inference as well as a great price-performance ratio. -Check out [this blog post about BERT pre-training](https://huggingface.co/blog/pretraining-bert) and [this article benchmarking Habana Gaudi2 versus Nvidia A100 GPUs](https://huggingface.co/blog/habana-gaudi-2-benchmark) for concrete examples. -If you are not familiar with HPUs and would like to know more about them, we recommend you take a look at [our conceptual guide](https://huggingface.co/docs/optimum/habana/concept_guides/hpu). +Check out [this blog post about BLOOM inference](https://huggingface.co/blog/habana-gaudi-2-bloom) and [this post benchmarking Intel Gaudi 2 and NVIDIA A100 GPUs for BridgeTower training](https://huggingface.co/blog/bridgetower) for concrete examples. ## Install the library and get example scripts @@ -45,13 +50,13 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up To use the example associated with the latest stable release, run: > ``` > git clone https://github.com/huggingface/optimum-habana -> cd optimum-habana && git checkout v1.10.2 +> cd optimum-habana && git checkout v1.11.1 > ``` -> with `v1.10.2` the version number of this release. +> with `v1.11.1` the version number of this release. ### Option 2: Use the latest main branch under development -Optimum Habana is a fast-moving project, and you may want to install it from source and get the latest scripts : +Optimum for Intel Gaudi is a fast-moving project, and you may want to install it from source and get the latest scripts : ```bash pip install git+https://github.com/huggingface/optimum-habana.git @@ -62,7 +67,7 @@ git clone https://github.com/huggingface/optimum-habana To use DeepSpeed on HPUs, you also need to run the following command: >```bash ->pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 +>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.0 >``` To install the requirements for every example: @@ -76,7 +81,7 @@ To install the requirements for every example: ### Quick Start -๐Ÿค— Optimum Habana was designed with one goal in mind: **to make training and inference straightforward for any ๐Ÿค— Transformers and ๐Ÿค— Diffusers user while leveraging the complete power of Gaudi processors**. +Optimum for Intel Gaudi was designed with one goal in mind: **to make training and inference straightforward for Transformers and Diffusers users, while fully leveraging the power of Intel Gaudi AI Accelerators**. #### Transformers Interface @@ -84,7 +89,7 @@ There are two main classes one needs to know: - [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/trainer): the trainer class that takes care of compiling and distributing the model to run on HPUs, and performing training and evaluation. - [GaudiConfig](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config): the class that enables to configure Habana Mixed Precision and to decide whether optimized operators and optimizers should be used or not. -The [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/trainer) is very similar to the [๐Ÿค— Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer), and adapting a script using the Trainer to make it work with Gaudi will mostly consist in simply swapping the `Trainer` class for the `GaudiTrainer` one. +The [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/trainer) is very similar to the [Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer), and adapting a script using the Trainer to make it work with Intel Gaudi accelerators will mostly consist in simply swapping the `Trainer` class for the `GaudiTrainer` one. That's how most of the [example scripts](https://github.com/huggingface/optimum-habana/tree/main/examples) were adapted from their [original counterparts](https://github.com/huggingface/transformers/tree/main/examples/pytorch). Here is an example: @@ -115,12 +120,12 @@ Here is an example: ) ``` -where `gaudi_config_name` is the name of a model from the [Hub](https://huggingface.co/Habana) (Gaudi configurations are stored in model repositories) or a path to a local Gaudi configuration file (you can see [here](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config) how to write your own). +where `gaudi_config_name` is the name of a model from the [Hub](https://huggingface.co/Habana) (Intel Gaudi configurations are stored in model repositories) or a path to a local Intel Gaudi configuration file (you can see [here](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config) how to write your own). #### Diffusers Interface -You can generate images from prompts using Stable Diffusion on Gaudi using the [`GaudiStableDiffusionPipeline`](https://huggingface.co/docs/optimum/habana/package_reference/stable_diffusion_pipeline) class and the [`GaudiDDIMScheduler`] which have been both optimized for HPUs. Here is how to use them and the differences with the ๐Ÿค— Diffusers library: +You can generate images from prompts using Stable Diffusion on Intel Gaudi using the [`GaudiStableDiffusionPipeline`](https://huggingface.co/docs/optimum/habana/package_reference/stable_diffusion_pipeline) class and the [`GaudiDDIMScheduler`] which have been both optimized for HPUs. Here is how to use them and the differences with the Diffusers library: ```diff - from diffusers import DDIMScheduler, StableDiffusionPipeline @@ -151,12 +156,12 @@ outputs = generator( ### Documentation -Check out [the documentation of Optimum Habana](https://huggingface.co/docs/optimum/habana/index) for more advanced usage. +Check out [the documentation of Optimum for Intel Gaudi](https://huggingface.co/docs/optimum/habana/index) for more advanced usage. ## Validated Models -The following model architectures, tasks and device distributions have been validated for ๐Ÿค— Optimum Habana: +The following model architectures, tasks and device distributions have been validated for Optimum for Intel Gaudi: > In the tables below, :heavy_check_mark: means single-card, multi-card and DeepSpeed have all been validated. @@ -175,20 +180,29 @@ The following model architectures, tasks and device distributions have been vali | GPT-J |
  • DeepSpeed
  • |
  • Single card
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | GPT-NeoX |
  • DeepSpeed
  • |
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | OPT | |
  • DeepSpeed
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | -| Llama 2 / CodeLlama |
  • DeepSpeed
  • LoRA
  • | :heavy_check_mark: |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Llama 2 / CodeLlama / Llama 3 / Llama Guard | :heavy_check_mark: | :heavy_check_mark: |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
  • [text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)
  • | | StableLM | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | -| Falcon |
  • LoRA
  • | :heavy_check_mark: |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Falcon |
  • LoRA
  • | :heavy_check_mark: |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | CodeGen | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | MPT | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | Mistral | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Phi | :heavy_check_mark: |
  • Single card
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Mixtral | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Persimmon | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Qwen2 |
  • Single card
  • |
  • Single card
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Gemma | :heavy_check_mark: |
  • Single card
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | T5 / Flan T5 | :heavy_check_mark: | :heavy_check_mark: |
  • [summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)
  • [translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)
  • | | BART | |
  • Single card
  • |
  • [summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)
  • [translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)
  • | | ViT | :heavy_check_mark: | :heavy_check_mark: |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | | Swin | :heavy_check_mark: | :heavy_check_mark: |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | | Wav2Vec2 | :heavy_check_mark: | :heavy_check_mark: |
  • [audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)
  • [speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)
  • | +| Whisper | :heavy_check_mark: | :heavy_check_mark: |
  • [speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)
  • | +| SpeechT5 | |
  • Single card
  • |
  • [text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)
  • | | CLIP | :heavy_check_mark: | :heavy_check_mark: |
  • [contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)
  • | | BridgeTower | :heavy_check_mark: | :heavy_check_mark: |
  • [contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)
  • | | ESMFold | |
  • Single card
  • |
  • [protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)
  • | +| Blip | |
  • Single card
  • |
  • [visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | +| OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | @@ -198,36 +212,49 @@ The following model architectures, tasks and device distributions have been vali | Architecture | Training | Inference | Tasks | |------------------|:--------:|:--------------------:|:------| -| Stable Diffusion | |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +| Stable Diffusion |
  • [textual inversion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#textual-inversion)
  • [ControlNet](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#controlnet-training)
  • |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +| Stable Diffusion XL |
  • [fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)
  • |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | | LDM3D | |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +- PyTorch Image Models/TIMM: + +
    + +| Architecture | Training | Inference | Tasks | +|---------------------|:--------:|:---------:|:------| +| FastViT | |
  • Single card
  • |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | + +
    + - TRL:
    -| Architecture | Training | Inference | Tasks | -|------------------|:--------:|:--------------------:|:------| -| Llama 2 | :heavy_check_mark: | |
  • [DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | -| Llama 2 | :heavy_check_mark: | |
  • [PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | +| Architecture | Training | Inference | Tasks | +|------------------|:--------:|:--------------------:|:-----------------------------------------------------------------------------------------------| +| Llama 2 | :heavy_check_mark: | |
  • [DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | +| Llama 2 | :heavy_check_mark: | |
  • [PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | +| Stable Diffusion | :heavy_check_mark: | |
  • [DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • |
    -Other models and tasks supported by the ๐Ÿค— Transformers and ๐Ÿค— Diffusers library may also work. You can refer to this [section](https://github.com/huggingface/optimum-habana#how-to-use-it) for using them with ๐Ÿค— Optimum Habana. Besides, [this page](https://github.com/huggingface/optimum-habana/tree/main/examples) explains how to modify any [example](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the ๐Ÿค— Transformers library to make it work with ๐Ÿค— Optimum Habana. +Other models and tasks supported by the Transformers and Diffusers libraries may also work. You can refer to this [section](https://github.com/huggingface/optimum-habana#how-to-use-it) for using them with Optimum for Intel Gaudi. In addition, [this page](https://github.com/huggingface/optimum-habana/tree/main/examples) explains how to modify any [example](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the Transformers library to make it work with Optimum for Intel Gaudi. If you find any issues while using those, please open an issue or a pull request. +After training your model, feel free to submit it to the Intel [leaderboard](https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard) which is designed to evaluate, score, and rank open-source LLMs that have been pre-trained or fine-tuned on Intel Hardwares. Models submitted to the leaderboard will be evaluated on the Intel Developer Cloud. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from the Eleuther AI Language Model Evaluation Harness. ## Gaudi Setup -Please refer to Habana Gaudi's official [installation guide](https://docs.habana.ai/en/latest/Installation_Guide/index.html). +Please refer to the Intel Gaudi AI Accelerator official [installation guide](https://docs.habana.ai/en/latest/Installation_Guide/index.html). -> Tests should be run in a Docker container based on Habana Docker images. +> Tests should be run in a Docker container based on Intel Gaudi Docker images. > -> The current version has been validated for SynapseAI 1.14. +> The current version has been validated for SynapseAI 1.15. ## Development -Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions. +Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions. \ No newline at end of file diff --git a/docs/source/index.mdx b/docs/source/index.mdx index ff85c8304..8c23bbb57 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -15,17 +15,17 @@ limitations under the License. --> -# ๐Ÿค— Optimum Habana +# Optimum for Intel Gaudi -๐Ÿค— Optimum Habana is the interface between the ๐Ÿค— Transformers and ๐Ÿค— Diffusers libraries and [Habana's Gaudi processor (HPU)](https://docs.habana.ai/en/latest/index.html). +Optimum for Intel Gaudi is the interface between the Transformers and Diffusers libraries and [Intelยฎ Gaudiยฎ AI Accelerators (HPUs)](https://docs.habana.ai/en/latest/index.html). It provides a set of tools that enable easy model loading, training and inference on single- and multi-HPU settings for various downstream tasks as shown in the table below. HPUs offer fast model training and inference as well as a great price-performance ratio. -Check out [this blog post about BERT pre-training](https://huggingface.co/blog/pretraining-bert) and [this article benchmarking Habana Gaudi2 versus Nvidia A100 GPUs](https://huggingface.co/blog/habana-gaudi-2-benchmark) for concrete examples. +Check out [this blog post about BERT pre-training](https://huggingface.co/blog/pretraining-bert) and [this post benchmarking Intel Gaudi 2 with NVIDIA A100 GPUs](https://huggingface.co/blog/habana-gaudi-2-benchmark) for concrete examples. If you are not familiar with HPUs, we recommend you take a look at [our conceptual guide](./concept_guides/hpu). -The following model architectures, tasks and device distributions have been validated for ๐Ÿค— Optimum Habana: +The following model architectures, tasks and device distributions have been validated for Optimum for Intel Gaudi: @@ -47,28 +47,42 @@ In the tables below, โœ… means single-card, multi-card and DeepSpeed have all be | GPT-J |
  • DeepSpeed
  • |
  • Single card
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | GPT-NeoX |
  • DeepSpeed
  • |
  • DeepSpeed
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | OPT | |
  • DeepSpeed
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | -| Llama 2 / CodeLlama |
  • DeepSpeed
  • LoRA
  • | โœ… |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Llama 2 / CodeLlama / Llama 3 / Llama Guard | โœ… | โœ… |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
  • [text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)
  • | | StableLM | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | Falcon |
  • LoRA
  • | โœ… |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | CodeGen | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | MPT | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | Mistral | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Phi | โœ… |
  • Single card
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Mixtral | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Gemma | โœ… |
  • Single card
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Qwen2 |
  • Single card
  • |
  • Single card
  • |
  • [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | +| Persimmon | |
  • Single card
  • |
  • [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
  • | | T5 / Flan T5 | โœ… | โœ… |
  • [summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)
  • [translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)
  • | | BART | |
  • Single card
  • |
  • [summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)
  • [translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)
  • [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)
  • | | ViT | โœ… | โœ… |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | | Swin | โœ… | โœ… |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | | Wav2Vec2 | โœ… | โœ… |
  • [audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)
  • [speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)
  • | +| Whisper | โœ… | โœ… |
  • [speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)
  • | +| SpeechT5 | |
  • Single card
  • |
  • [text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)
  • | | CLIP | โœ… | โœ… |
  • [contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)
  • | | BridgeTower | โœ… | โœ… |
  • [contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)
  • | | ESMFold | |
  • Single card
  • |
  • [protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)
  • | +| Blip | |
  • Single card
  • |
  • [visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | +| OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | -- Diffusers +- Diffusers: -| Architecture | Training | Inference | Tasks | -|------------------|:--------:|:---------:|:------| -| Stable Diffusion | |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | -| LDM3D | |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +| Architecture | Training | Inference | Tasks | +|---------------------|:--------:|:---------:|:------| +| Stable Diffusion |
  • [textual inversion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#textual-inversion)
  • [ControlNet](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#controlnet-training)
  • |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +| Stable Diffusion XL |
  • [fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)
  • |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +| LDM3D | |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +- PyTorch Image Models/TIMM: +| Architecture | Training | Inference | Tasks | +|---------------------|:--------:|:---------:|:------| +| FastViT | |
  • Single card
  • |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | - TRL: @@ -76,6 +90,7 @@ In the tables below, โœ… means single-card, multi-card and DeepSpeed have all be |------------------|:--------:|:--------------------:|:------| | Llama 2 | โœ… | |
  • [DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | | Llama 2 | โœ… | |
  • [PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | +| Stable Diffusion | โœ… | |
  • [DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
  • | Other models and tasks supported by the ๐Ÿค— Transformers and ๐Ÿค— Diffusers library may also work. @@ -102,4 +117,4 @@ Besides, [this page](https://github.com/huggingface/optimum-habana/tree/main/exa

    Technical descriptions of how the Habana classes and methods of ๐Ÿค— Optimum Habana work.

    - + \ No newline at end of file diff --git a/examples/image-classification/README.md b/examples/image-classification/README.md index f7a9cdd18..8d21f38e6 100644 --- a/examples/image-classification/README.md +++ b/examples/image-classification/README.md @@ -16,7 +16,7 @@ limitations under the License. # Image Classification Examples -This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data). +This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data). This directory also contains a script to demonstrate a single HPU inference for [PyTorch-Image-Models/TIMM](https://huggingface.co/docs/timm/index) ## Single-HPU training @@ -288,3 +288,24 @@ python run_image_classification.py \ --gaudi_config_name Habana/vit \ --dataloader_num_workers 1 \ --bf16 + +## TIMM/FastViT Examples + +This directory contains an example script that demonstrates using FastViT with graph mode. + +### Single-HPU inference + +```bash +python3 run_timm_example.py \ + --model_name_or_path "timm/fastvit_t8.apple_in1k" \ + --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k) + - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k) + - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k) \ No newline at end of file diff --git a/examples/image-classification/requirements.txt b/examples/image-classification/requirements.txt index 87694059f..7b0e43a8d 100644 --- a/examples/image-classification/requirements.txt +++ b/examples/image-classification/requirements.txt @@ -3,3 +3,4 @@ torchvision>=0.6.0 datasets>=2.14.0 evaluate scikit-learn +timm>=0.9.16 \ No newline at end of file diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/run_timm_example.py similarity index 88% rename from examples/image-classification/timm_fastvit/run_example.py rename to examples/image-classification/run_timm_example.py index 2a2e55d05..6d96b0102 100644 --- a/examples/image-classification/timm_fastvit/run_example.py +++ b/examples/image-classification/run_timm_example.py @@ -15,19 +15,18 @@ # Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k -from transformers import AutoProcessor, CLIPSegForImageSegmentation -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time import argparse -from torchvision.utils import save_image +import time + +import habana_frameworks.torch as ht +import requests import timm +import torch +from PIL import Image from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -65,9 +64,8 @@ adapt_transformers_to_gaudi() - model = timm.create_model(args.model_name_or_path, pretrained=True) - model.to('hpu') + model.to("hpu") model = model.eval() data_config = timm.data.resolve_model_data_config(model) transforms = timm.data.create_transform(**data_config, is_training=False) @@ -82,13 +80,13 @@ with torch.no_grad(), autocast: for i in range(args.warmup): - inputs = transforms(img).unsqueeze(0).to('hpu') + inputs = transforms(img).unsqueeze(0).to("hpu") outputs = model(inputs) torch.hpu.synchronize() total_model_time = 0 for i in range(args.n_iterations): - inputs = transforms(img).unsqueeze(0).to('hpu') + inputs = transforms(img).unsqueeze(0).to("hpu") model_start_time = time.time() outputs = model(inputs) torch.hpu.synchronize() @@ -97,8 +95,8 @@ if args.print_result: top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5) - print("top5_class_indices: " + str(top5_class_indices)) + print("top5_class_indices: " + str(top5_class_indices.to("cpu").numpy())) print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file + print("Total latency (ms): " + str(total_model_time * 1000)) + print("Average latency (ms): " + str(total_model_time * 1000 / args.n_iterations)) diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md deleted file mode 100644 index 65221adaa..000000000 --- a/examples/image-classification/timm_fastvit/README.md +++ /dev/null @@ -1,33 +0,0 @@ - - -# FastViT Examples - -This directory contains an example script that demonstrates using FastViT with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "timm/fastvit_t8.apple_in1k" \ - --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k) - - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k) - - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k) \ No newline at end of file From 5386b17e3050fcb2c827723ef0ac5a36802adbcd Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 14 Mar 2024 17:53:19 +0000 Subject: [PATCH 07/13] Add an example of object-segmentation (ClipSeg) using graph mode. --- .../object-segementation/ClipSeg/README.md | 32 ++++++ .../ClipSeg/run_example.py | 108 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 examples/object-segementation/ClipSeg/README.md create mode 100644 examples/object-segementation/ClipSeg/run_example.py diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md new file mode 100644 index 000000000..f476a5a4e --- /dev/null +++ b/examples/object-segementation/ClipSeg/README.md @@ -0,0 +1,32 @@ + + +# Owl-ViT Examples + +This directory contains an example script that demonstrates using ClipSeg with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "CIDAS/clipseg-rd64-refined" \ + --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ + --prompt "a cat, a remote, a blanket" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py new file mode 100644 index 000000000..d4aa3efae --- /dev/null +++ b/examples/object-segementation/ClipSeg/run_example.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg + +from transformers import AutoProcessor, CLIPSegForImageSegmentation +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="CIDAS/clipseg-rd64-refined", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="http://images.cocodataset.org/val2017/000000039769.jpg", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--prompt", + default="a cat, a remote, a blanket", + type=str, + help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to print the classification results.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + processor = AutoProcessor.from_pretrained(args.model_name_or_path) + model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path) + + image = Image.open(requests.get(args.image_path, stream=True).raw) + texts = [] + for text in args.prompt.split(','): + texts.append(text) + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + logits = outputs.logits + print(logits.shape) + print("Logits: " + str(logits)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 4318a09abd196ffaae887351b2146a58a96617d1 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 14 Mar 2024 18:44:43 +0000 Subject: [PATCH 08/13] Updated readme and added codes for generating segmented images. --- examples/object-segementation/ClipSeg/README.md | 2 +- examples/object-segementation/ClipSeg/run_example.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md index f476a5a4e..d8c76272a 100644 --- a/examples/object-segementation/ClipSeg/README.md +++ b/examples/object-segementation/ClipSeg/README.md @@ -21,7 +21,7 @@ This directory contains an example script that demonstrates using ClipSeg with g python3 run_example.py \ --model_name_or_path "CIDAS/clipseg-rd64-refined" \ --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ - --prompt "a cat, a remote, a blanket" \ + --prompt "cat, remote, blanket" \ --warmup 3 \ --n_iterations 20 \ --use_hpu_graphs \ diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py index d4aa3efae..a10b8fa9d 100644 --- a/examples/object-segementation/ClipSeg/run_example.py +++ b/examples/object-segementation/ClipSeg/run_example.py @@ -23,6 +23,7 @@ import habana_frameworks.torch.core as htcore import time import argparse +from torchvision.utils import save_image from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi @@ -99,9 +100,14 @@ total_model_time = total_model_time + (model_end_time - model_start_time) if args.print_result: - logits = outputs.logits - print(logits.shape) - print("Logits: " + str(logits)) + if (i == 0): # generate/output once only + logits = outputs.logits + for j in range(logits.shape[0]): + threshold = 0.5 + segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0) + segmented_image = segmented_image.to(torch.float32) + save_image(segmented_image, 'segmented' + texts[j] + '.png') + print('Segmented images are generated.') print("n_iterations: " + str(args.n_iterations)) print("Total latency (ms): " + str(total_model_time*1000)) From b3c704317f6bb22cf039db58690ffe2433faf867 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 18 Mar 2024 18:08:06 +0000 Subject: [PATCH 09/13] Added an example code of SAM model. --- .../SegmentAnythingModel/README.md | 33 ++++++ .../SegmentAnythingModel/run_example.py | 109 ++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 examples/object-segementation/SegmentAnythingModel/README.md create mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md new file mode 100644 index 000000000..8a0f24300 --- /dev/null +++ b/examples/object-segementation/SegmentAnythingModel/README.md @@ -0,0 +1,33 @@ + + +# Owl-ViT Examples + +This directory contains an example script that demonstrates using SAM with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "facebook/sam-vit-huge" \ + --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ + --point_prompt "450,600" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) + - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py new file mode 100644 index 000000000..592f7f429 --- /dev/null +++ b/examples/object-segementation/SegmentAnythingModel/run_example.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/facebook/sam-vit-base + +from transformers import SamModel, SamProcessor +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="facebook/sam-vit-huge", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--point_prompt", + default="450, 600", + type=str, + help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to save the segmentation result.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + processor = SamProcessor.from_pretrained(args.model_name_or_path) + model = SamModel.from_pretrained(args.model_name_or_path) + + image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB") + points = [] + for text in args.point_prompt.split(','): + points.append(int(text)) + points = [[points]] + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") + outputs = model(**inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") + model_start_time = time.time() + outputs = model(**inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + if (i == 0): # generate/output once only + iou = outputs.iou_scores + print("iou score: " + str(iou)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 0693d4178390f4fc516c869c304f32da209467a6 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Thu, 21 Mar 2024 17:51:56 +0000 Subject: [PATCH 10/13] Add an example of TIMM/FastViT. --- .../timm_fastvit/README.md | 33 ++++++ .../timm_fastvit/run_example.py | 104 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 examples/image-classification/timm_fastvit/README.md create mode 100644 examples/image-classification/timm_fastvit/run_example.py diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md new file mode 100644 index 000000000..65221adaa --- /dev/null +++ b/examples/image-classification/timm_fastvit/README.md @@ -0,0 +1,33 @@ + + +# FastViT Examples + +This directory contains an example script that demonstrates using FastViT with graph mode. + +## Single-HPU inference + +```bash +python3 run_example.py \ + --model_name_or_path "timm/fastvit_t8.apple_in1k" \ + --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k) + - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k) + - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k) \ No newline at end of file diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/timm_fastvit/run_example.py new file mode 100644 index 000000000..2a2e55d05 --- /dev/null +++ b/examples/image-classification/timm_fastvit/run_example.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +# Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k + +from transformers import AutoProcessor, CLIPSegForImageSegmentation +from PIL import Image +import requests +import torch +import habana_frameworks.torch as ht +import habana_frameworks.torch.core as htcore +import time +import argparse +from torchvision.utils import save_image +import timm + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_name_or_path", + default="timm/fastvit_t8.apple_in1k", + type=str, + help="Path of the pre-trained model", + ) + parser.add_argument( + "--image_path", + default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png", + type=str, + help='Path of the input image. Should be a single string (eg: --image_path "URL")', + ) + parser.add_argument( + "--use_hpu_graphs", + action="store_true", + help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", + ) + parser.add_argument( + "--bf16", + action="store_true", + help="Whether to use bf16 precision for classification.", + ) + parser.add_argument( + "--print_result", + action="store_true", + help="Whether to print the classification results.", + ) + parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") + + args = parser.parse_args() + + adapt_transformers_to_gaudi() + + + model = timm.create_model(args.model_name_or_path, pretrained=True) + model.to('hpu') + model = model.eval() + data_config = timm.data.resolve_model_data_config(model) + transforms = timm.data.create_transform(**data_config, is_training=False) + + img = Image.open(requests.get(args.image_path, stream=True).raw) + + if args.use_hpu_graphs: + model = ht.hpu.wrap_in_hpu_graph(model) + + autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) + model.to("hpu") + + with torch.no_grad(), autocast: + for i in range(args.warmup): + inputs = transforms(img).unsqueeze(0).to('hpu') + outputs = model(inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(args.n_iterations): + inputs = transforms(img).unsqueeze(0).to('hpu') + model_start_time = time.time() + outputs = model(inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + if args.print_result: + top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5) + print("top5_class_indices: " + str(top5_class_indices)) + + print("n_iterations: " + str(args.n_iterations)) + print("Total latency (ms): " + str(total_model_time*1000)) + print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From 4c496a322b9ae6535e641353ee2215071a9b8d57 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Mon, 25 Mar 2024 21:00:47 +0000 Subject: [PATCH 11/13] Removed unrelated files. --- .../object-segementation/ClipSeg/README.md | 32 ----- .../ClipSeg/run_example.py | 114 ------------------ .../SegmentAnythingModel/README.md | 33 ----- .../SegmentAnythingModel/run_example.py | 109 ----------------- 4 files changed, 288 deletions(-) delete mode 100644 examples/object-segementation/ClipSeg/README.md delete mode 100644 examples/object-segementation/ClipSeg/run_example.py delete mode 100644 examples/object-segementation/SegmentAnythingModel/README.md delete mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md deleted file mode 100644 index d8c76272a..000000000 --- a/examples/object-segementation/ClipSeg/README.md +++ /dev/null @@ -1,32 +0,0 @@ - - -# Owl-ViT Examples - -This directory contains an example script that demonstrates using ClipSeg with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "CIDAS/clipseg-rd64-refined" \ - --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \ - --prompt "cat, remote, blanket" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined) \ No newline at end of file diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py deleted file mode 100644 index a10b8fa9d..000000000 --- a/examples/object-segementation/ClipSeg/run_example.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg - -from transformers import AutoProcessor, CLIPSegForImageSegmentation -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse -from torchvision.utils import save_image - -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--model_name_or_path", - default="CIDAS/clipseg-rd64-refined", - type=str, - help="Path of the pre-trained model", - ) - parser.add_argument( - "--image_path", - default="http://images.cocodataset.org/val2017/000000039769.jpg", - type=str, - help='Path of the input image. Should be a single string (eg: --image_path "URL")', - ) - parser.add_argument( - "--prompt", - default="a cat, a remote, a blanket", - type=str, - help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")', - ) - parser.add_argument( - "--use_hpu_graphs", - action="store_true", - help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", - ) - parser.add_argument( - "--bf16", - action="store_true", - help="Whether to use bf16 precision for classification.", - ) - parser.add_argument( - "--print_result", - action="store_true", - help="Whether to print the classification results.", - ) - parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") - parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") - - args = parser.parse_args() - - adapt_transformers_to_gaudi() - - processor = AutoProcessor.from_pretrained(args.model_name_or_path) - model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path) - - image = Image.open(requests.get(args.image_path, stream=True).raw) - texts = [] - for text in args.prompt.split(','): - texts.append(text) - - if args.use_hpu_graphs: - model = ht.hpu.wrap_in_hpu_graph(model) - - autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) - model.to("hpu") - - with torch.no_grad(), autocast: - for i in range(args.warmup): - inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") - outputs = model(**inputs) - torch.hpu.synchronize() - - total_model_time = 0 - for i in range(args.n_iterations): - inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu") - model_start_time = time.time() - outputs = model(**inputs) - torch.hpu.synchronize() - model_end_time = time.time() - total_model_time = total_model_time + (model_end_time - model_start_time) - - if args.print_result: - if (i == 0): # generate/output once only - logits = outputs.logits - for j in range(logits.shape[0]): - threshold = 0.5 - segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0) - segmented_image = segmented_image.to(torch.float32) - save_image(segmented_image, 'segmented' + texts[j] + '.png') - print('Segmented images are generated.') - - print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md deleted file mode 100644 index 8a0f24300..000000000 --- a/examples/object-segementation/SegmentAnythingModel/README.md +++ /dev/null @@ -1,33 +0,0 @@ - - -# Owl-ViT Examples - -This directory contains an example script that demonstrates using SAM with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "facebook/sam-vit-huge" \ - --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \ - --point_prompt "450,600" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) - - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge) \ No newline at end of file diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py deleted file mode 100644 index 592f7f429..000000000 --- a/examples/object-segementation/SegmentAnythingModel/run_example.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# Copied from https://huggingface.co/facebook/sam-vit-base - -from transformers import SamModel, SamProcessor -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time -import argparse - -from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - - parser.add_argument( - "--model_name_or_path", - default="facebook/sam-vit-huge", - type=str, - help="Path of the pre-trained model", - ) - parser.add_argument( - "--image_path", - default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png", - type=str, - help='Path of the input image. Should be a single string (eg: --image_path "URL")', - ) - parser.add_argument( - "--point_prompt", - default="450, 600", - type=str, - help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")', - ) - parser.add_argument( - "--use_hpu_graphs", - action="store_true", - help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.", - ) - parser.add_argument( - "--bf16", - action="store_true", - help="Whether to use bf16 precision for classification.", - ) - parser.add_argument( - "--print_result", - action="store_true", - help="Whether to save the segmentation result.", - ) - parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.") - parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.") - - args = parser.parse_args() - - adapt_transformers_to_gaudi() - - processor = SamProcessor.from_pretrained(args.model_name_or_path) - model = SamModel.from_pretrained(args.model_name_or_path) - - image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB") - points = [] - for text in args.point_prompt.split(','): - points.append(int(text)) - points = [[points]] - - if args.use_hpu_graphs: - model = ht.hpu.wrap_in_hpu_graph(model) - - autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16) - model.to("hpu") - - with torch.no_grad(), autocast: - for i in range(args.warmup): - inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") - outputs = model(**inputs) - torch.hpu.synchronize() - - total_model_time = 0 - for i in range(args.n_iterations): - inputs = processor(image, input_points=points, return_tensors="pt").to("hpu") - model_start_time = time.time() - outputs = model(**inputs) - torch.hpu.synchronize() - model_end_time = time.time() - total_model_time = total_model_time + (model_end_time - model_start_time) - - if args.print_result: - if (i == 0): # generate/output once only - iou = outputs.iou_scores - print("iou score: " + str(iou)) - - print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file From bda2df2cbb188a81cdd21904d1cb7a788c9ab422 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Wed, 15 May 2024 17:34:19 +0000 Subject: [PATCH 12/13] Added Readme, readme.idx etc. Aligned the style. --- README.md | 12 ++++++- docs/source/index.mdx | 8 +++-- examples/image-classification/README.md | 23 ++++++++++++- .../image-classification/requirements.txt | 1 + .../run_example.py => run_timm_example.py} | 28 ++++++++-------- .../timm_fastvit/README.md | 33 ------------------- 6 files changed, 53 insertions(+), 52 deletions(-) rename examples/image-classification/{timm_fastvit/run_example.py => run_timm_example.py} (88%) delete mode 100644 examples/image-classification/timm_fastvit/README.md diff --git a/README.md b/README.md index f84db6484..f3da039fe 100644 --- a/README.md +++ b/README.md @@ -218,6 +218,16 @@ The following model architectures, tasks and device distributions have been vali +- PyTorch Image Models/TIMM: + +
    + +| Architecture | Training | Inference | Tasks | +|---------------------|:--------:|:---------:|:------| +| FastViT | |
  • Single card
  • |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | + +
    + - TRL:
    @@ -247,4 +257,4 @@ Please refer to the Intel Gaudi AI Accelerator official [installation guide](htt ## Development -Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions. +Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions. \ No newline at end of file diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 63ec3279c..8c23bbb57 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -71,7 +71,7 @@ In the tables below, โœ… means single-card, multi-card and DeepSpeed have all be | Blip | |
  • Single card
  • |
  • [visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)
  • [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
  • | | OWLViT | |
  • Single card
  • |
  • [zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
  • | -- Diffusers +- Diffusers: | Architecture | Training | Inference | Tasks | |---------------------|:--------:|:---------:|:------| @@ -79,6 +79,10 @@ In the tables below, โœ… means single-card, multi-card and DeepSpeed have all be | Stable Diffusion XL |
  • [fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)
  • |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | | LDM3D | |
  • Single card
  • |
  • [text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
  • | +- PyTorch Image Models/TIMM: +| Architecture | Training | Inference | Tasks | +|---------------------|:--------:|:---------:|:------| +| FastViT | |
  • Single card
  • |
  • [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
  • | - TRL: @@ -113,4 +117,4 @@ Besides, [this page](https://github.com/huggingface/optimum-habana/tree/main/exa

    Technical descriptions of how the Habana classes and methods of ๐Ÿค— Optimum Habana work.

    - + \ No newline at end of file diff --git a/examples/image-classification/README.md b/examples/image-classification/README.md index f7a9cdd18..8d21f38e6 100644 --- a/examples/image-classification/README.md +++ b/examples/image-classification/README.md @@ -16,7 +16,7 @@ limitations under the License. # Image Classification Examples -This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data). +This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data). This directory also contains a script to demonstrate a single HPU inference for [PyTorch-Image-Models/TIMM](https://huggingface.co/docs/timm/index) ## Single-HPU training @@ -288,3 +288,24 @@ python run_image_classification.py \ --gaudi_config_name Habana/vit \ --dataloader_num_workers 1 \ --bf16 + +## TIMM/FastViT Examples + +This directory contains an example script that demonstrates using FastViT with graph mode. + +### Single-HPU inference + +```bash +python3 run_timm_example.py \ + --model_name_or_path "timm/fastvit_t8.apple_in1k" \ + --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \ + --warmup 3 \ + --n_iterations 20 \ + --use_hpu_graphs \ + --bf16 \ + --print_result +``` +Models that have been validated: + - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k) + - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k) + - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k) \ No newline at end of file diff --git a/examples/image-classification/requirements.txt b/examples/image-classification/requirements.txt index 87694059f..7b0e43a8d 100644 --- a/examples/image-classification/requirements.txt +++ b/examples/image-classification/requirements.txt @@ -3,3 +3,4 @@ torchvision>=0.6.0 datasets>=2.14.0 evaluate scikit-learn +timm>=0.9.16 \ No newline at end of file diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/run_timm_example.py similarity index 88% rename from examples/image-classification/timm_fastvit/run_example.py rename to examples/image-classification/run_timm_example.py index 2a2e55d05..6d96b0102 100644 --- a/examples/image-classification/timm_fastvit/run_example.py +++ b/examples/image-classification/run_timm_example.py @@ -15,19 +15,18 @@ # Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k -from transformers import AutoProcessor, CLIPSegForImageSegmentation -from PIL import Image -import requests -import torch -import habana_frameworks.torch as ht -import habana_frameworks.torch.core as htcore -import time import argparse -from torchvision.utils import save_image +import time + +import habana_frameworks.torch as ht +import requests import timm +import torch +from PIL import Image from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -65,9 +64,8 @@ adapt_transformers_to_gaudi() - model = timm.create_model(args.model_name_or_path, pretrained=True) - model.to('hpu') + model.to("hpu") model = model.eval() data_config = timm.data.resolve_model_data_config(model) transforms = timm.data.create_transform(**data_config, is_training=False) @@ -82,13 +80,13 @@ with torch.no_grad(), autocast: for i in range(args.warmup): - inputs = transforms(img).unsqueeze(0).to('hpu') + inputs = transforms(img).unsqueeze(0).to("hpu") outputs = model(inputs) torch.hpu.synchronize() total_model_time = 0 for i in range(args.n_iterations): - inputs = transforms(img).unsqueeze(0).to('hpu') + inputs = transforms(img).unsqueeze(0).to("hpu") model_start_time = time.time() outputs = model(inputs) torch.hpu.synchronize() @@ -97,8 +95,8 @@ if args.print_result: top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5) - print("top5_class_indices: " + str(top5_class_indices)) + print("top5_class_indices: " + str(top5_class_indices.to("cpu").numpy())) print("n_iterations: " + str(args.n_iterations)) - print("Total latency (ms): " + str(total_model_time*1000)) - print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations)) \ No newline at end of file + print("Total latency (ms): " + str(total_model_time * 1000)) + print("Average latency (ms): " + str(total_model_time * 1000 / args.n_iterations)) diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md deleted file mode 100644 index 65221adaa..000000000 --- a/examples/image-classification/timm_fastvit/README.md +++ /dev/null @@ -1,33 +0,0 @@ - - -# FastViT Examples - -This directory contains an example script that demonstrates using FastViT with graph mode. - -## Single-HPU inference - -```bash -python3 run_example.py \ - --model_name_or_path "timm/fastvit_t8.apple_in1k" \ - --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \ - --warmup 3 \ - --n_iterations 20 \ - --use_hpu_graphs \ - --bf16 \ - --print_result -``` -Models that have been validated: - - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k) - - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k) - - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k) \ No newline at end of file From 3b76c1b2abdeea78e64c00ccbee7de9ac7a8ebc7 Mon Sep 17 00:00:00 2001 From: Raymond Lau Date: Tue, 25 Jun 2024 17:31:42 +0000 Subject: [PATCH 13/13] Rebase to the latest main branch and added test cases. --- Makefile | 5 ++ tests/test_image_classification.py | 120 +++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 tests/test_image_classification.py diff --git a/Makefile b/Makefile index 6e87a399a..a43f2a55a 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,11 @@ fast_tests_diffusers: python -m pip install .[tests] python -m pytest tests/test_diffusers.py +# Run single-card non-regression tests on image classification models +fast_tests_image_classifications: + pip install timm + python -m pytest tests/test_image_classification.py + # Run single-card non-regression tests slow_tests_1x: test_installs python -m pytest tests/test_examples.py -v -s -k "single_card" diff --git a/tests/test_image_classification.py b/tests/test_image_classification.py new file mode 100644 index 000000000..6e59b7ac4 --- /dev/null +++ b/tests/test_image_classification.py @@ -0,0 +1,120 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from unittest import TestCase + +import habana_frameworks.torch as ht +import numpy as np +import requests +import timm +import torch +from PIL import Image + +from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + + +adapt_transformers_to_gaudi() + +# For Gaudi 2 +LATENCY_FastViT_BF16_GRAPH_BASELINE = 2.5270626640319824 + + +class GaudiFastViTTester(TestCase): + """ + Tests for FastViT model + """ + + def prepare_model_and_processor(self): + model = timm.create_model("timm/fastvit_t8.apple_in1k", pretrained=True) + model.to("hpu") + model = model.eval() + data_config = timm.data.resolve_model_data_config(model) + processor = timm.data.create_transform(**data_config, is_training=False) + return model, processor + + def prepare_data(self): + url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" + image = Image.open(requests.get(url, stream=True).raw) + return image + + def test_inference_default(self): + model, processor = self.prepare_model_and_processor() + image = self.prepare_data() + inputs = processor(image).unsqueeze(0).to("hpu") + outputs = model(inputs) + top1_probabilities, top1_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=1) + top1_probabilities = top1_probabilities.to("cpu").detach().numpy() + top1_class_indices = top1_class_indices.to("cpu").numpy() + expected_scores = np.array([21.406523]) # from CPU + expected_class = np.array([960]) + self.assertEqual(top1_class_indices, expected_class) + self.assertLess(np.abs(top1_probabilities - expected_scores).max(), 1) + + def test_inference_autocast(self): + model, processor = self.prepare_model_and_processor() + image = self.prepare_data() + inputs = processor(image).unsqueeze(0).to("hpu") + + with torch.autocast(device_type="hpu", dtype=torch.bfloat16): # Autocast BF16 + outputs = model(inputs) + top1_probabilities, top1_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=1) + top1_probabilities = top1_probabilities.to("cpu").detach().numpy() + top1_class_indices = top1_class_indices.to("cpu").numpy() + expected_scores = np.array([21.406523]) # from CPU + expected_class = np.array([960]) + self.assertEqual(top1_class_indices, expected_class) + self.assertLess(np.abs(top1_probabilities - expected_scores).max(), 1) + + def test_inference_hpu_graphs(self): + model, processor = self.prepare_model_and_processor() + image = self.prepare_data() + inputs = processor(image).unsqueeze(0).to("hpu") + + model = ht.hpu.wrap_in_hpu_graph(model) # Apply graph + + outputs = model(inputs) + top1_probabilities, top1_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=1) + top1_probabilities = top1_probabilities.to("cpu").detach().numpy() + top1_class_indices = top1_class_indices.to("cpu").numpy() + expected_scores = np.array([21.406523]) # from CPU + expected_class = np.array([960]) + self.assertEqual(top1_class_indices, expected_class) + self.assertLess(np.abs(top1_probabilities - expected_scores).max(), 1) + + def test_no_latency_regression_autocast(self): + warmup = 3 + iterations = 20 + + model, processor = self.prepare_model_and_processor() + image = self.prepare_data() + + model = ht.hpu.wrap_in_hpu_graph(model) + + with torch.no_grad(), torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True): + for i in range(warmup): + inputs = processor(image).unsqueeze(0).to("hpu") + _ = model(inputs) + torch.hpu.synchronize() + + total_model_time = 0 + for i in range(iterations): + inputs = processor(image).unsqueeze(0).to("hpu") + model_start_time = time.time() + _ = model(inputs) + torch.hpu.synchronize() + model_end_time = time.time() + total_model_time = total_model_time + (model_end_time - model_start_time) + + latency = total_model_time * 1000 / iterations # in terms of ms + self.assertLessEqual(latency, 1.05 * LATENCY_FastViT_BF16_GRAPH_BASELINE)