huggingface · cfgfung · Mar 14, 2024 · Mar 14, 2024 · Mar 18, 2024 · Mar 21, 2024
diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md
@@ -0,0 +1,33 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# FastViT Examples
+
+This directory contains an example script that demonstrates using FastViT with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+ --model_name_or_path "timm/fastvit_t8.apple_in1k" \
+ --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
+ --warmup 3 \
+ --n_iterations 20 \
+ --use_hpu_graphs \
+ --bf16 \
+ --print_result
+```
+Models that have been validated:
+ - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
+ - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
+ - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/timm_fastvit/run_example.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k
+
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+from torchvision.utils import save_image
+import timm
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument(
+ "--model_name_or_path",
+ default="timm/fastvit_t8.apple_in1k",
+ type=str,
+ help="Path of the pre-trained model",
+ )
+ parser.add_argument(
+ "--image_path",
+ default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
+ type=str,
+ help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+ )
+ parser.add_argument(
+ "--use_hpu_graphs",
+ action="store_true",
+ help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+ )
+ parser.add_argument(
+ "--bf16",
+ action="store_true",
+ help="Whether to use bf16 precision for classification.",
+ )
+ parser.add_argument(
+ "--print_result",
+ action="store_true",
+ help="Whether to print the classification results.",
+ )
+ parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+ parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+ args = parser.parse_args()
+
+ adapt_transformers_to_gaudi()
+
+
+ model = timm.create_model(args.model_name_or_path, pretrained=True)
+ model.to('hpu')
+ model = model.eval()
+ data_config = timm.data.resolve_model_data_config(model)
+ transforms = timm.data.create_transform(**data_config, is_training=False)
+
+ img = Image.open(requests.get(args.image_path, stream=True).raw)
+
+ if args.use_hpu_graphs:
+ model = ht.hpu.wrap_in_hpu_graph(model)
+
+ autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+ model.to("hpu")
+
+ with torch.no_grad(), autocast:
+ for i in range(args.warmup):
+ inputs = transforms(img).unsqueeze(0).to('hpu')
+ outputs = model(inputs)
+ torch.hpu.synchronize()
+
+ total_model_time = 0
+ for i in range(args.n_iterations):
+ inputs = transforms(img).unsqueeze(0).to('hpu')
+ model_start_time = time.time()
+ outputs = model(inputs)
+ torch.hpu.synchronize()
+ model_end_time = time.time()
+ total_model_time = total_model_time + (model_end_time - model_start_time)
+
+ if args.print_result:
+ top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5)
+ print("top5_class_indices: " + str(top5_class_indices))
+
+ print("n_iterations: " + str(args.n_iterations))
+ print("Total latency (ms): " + str(total_model_time*1000))
+ print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
@@ -0,0 +1,32 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Owl-ViT Examples
+
+This directory contains an example script that demonstrates using ClipSeg with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+ --model_name_or_path "CIDAS/clipseg-rd64-refined" \
+ --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
+ --prompt "cat, remote, blanket" \
+ --warmup 3 \
+ --n_iterations 20 \
+ --use_hpu_graphs \
+ --bf16 \
+ --print_result
+```
+Models that have been validated:
+ - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined)
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg
+
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+from torchvision.utils import save_image
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument(
+ "--model_name_or_path",
+ default="CIDAS/clipseg-rd64-refined",
+ type=str,
+ help="Path of the pre-trained model",
+ )
+ parser.add_argument(
+ "--image_path",
+ default="http://images.cocodataset.org/val2017/000000039769.jpg",
+ type=str,
+ help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+ )
+ parser.add_argument(
+ "--prompt",
+ default="a cat, a remote, a blanket",
+ type=str,
+ help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")',
+ )
+ parser.add_argument(
+ "--use_hpu_graphs",
+ action="store_true",
+ help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+ )
+ parser.add_argument(
+ "--bf16",
+ action="store_true",
+ help="Whether to use bf16 precision for classification.",
+ )
+ parser.add_argument(
+ "--print_result",
+ action="store_true",
+ help="Whether to print the classification results.",
+ )
+ parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+ parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+ args = parser.parse_args()
+
+ adapt_transformers_to_gaudi()
+
+ processor = AutoProcessor.from_pretrained(args.model_name_or_path)
+ model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path)
+
+ image = Image.open(requests.get(args.image_path, stream=True).raw)
+ texts = []
+ for text in args.prompt.split(','):
+ texts.append(text)
+
+ if args.use_hpu_graphs:
+ model = ht.hpu.wrap_in_hpu_graph(model)
+
+ autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+ model.to("hpu")
+
+ with torch.no_grad(), autocast:
+ for i in range(args.warmup):
+ inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
+ outputs = model(**inputs)
+ torch.hpu.synchronize()
+
+ total_model_time = 0
+ for i in range(args.n_iterations):
+ inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
+ model_start_time = time.time()
+ outputs = model(**inputs)
+ torch.hpu.synchronize()
+ model_end_time = time.time()
+ total_model_time = total_model_time + (model_end_time - model_start_time)
+
+ if args.print_result:
+ if (i == 0): # generate/output once only
+ logits = outputs.logits
+ for j in range(logits.shape[0]):
+ threshold = 0.5
+ segmented_image = ((torch.sigmoid(logits[j]) > threshold)*255).unsqueeze(0)
+ segmented_image = segmented_image.to(torch.float32)
+ save_image(segmented_image, 'segmented' + texts[j] + '.png')
+ print('Segmented images are generated.')
+
+ print("n_iterations: " + str(args.n_iterations))
+ print("Total latency (ms): " + str(total_model_time*1000))
+ print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md
@@ -0,0 +1,33 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Owl-ViT Examples
+
+This directory contains an example script that demonstrates using SAM with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+ --model_name_or_path "facebook/sam-vit-huge" \
+ --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \
+ --point_prompt "450,600" \
+ --warmup 3 \
+ --n_iterations 20 \
+ --use_hpu_graphs \
+ --bf16 \
+ --print_result
+```
+Models that have been validated:
+ - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base)
+ - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge)