From 53098593bf6035b397b7129b41f9c97a509c94e8 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Thu, 14 Mar 2024 17:53:19 +0000
Subject: [PATCH 01/13] Add an example of object-segmentation (ClipSeg) using
 graph mode.

---
 .../object-segementation/ClipSeg/README.md    |  32 ++++++
 .../ClipSeg/run_example.py                    | 108 ++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 examples/object-segementation/ClipSeg/README.md
 create mode 100644 examples/object-segementation/ClipSeg/run_example.py

diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
new file mode 100644
index 000000000..f476a5a4e
--- /dev/null
+++ b/examples/object-segementation/ClipSeg/README.md
@@ -0,0 +1,32 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Owl-ViT Examples
+
+This directory contains an example script that demonstrates using ClipSeg with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "CIDAS/clipseg-rd64-refined" \
+    --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
+    --prompt "a cat, a remote, a blanket" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined)
\ No newline at end of file
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
new file mode 100644
index 000000000..d4aa3efae
--- /dev/null
+++ b/examples/object-segementation/ClipSeg/run_example.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg
+
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="CIDAS/clipseg-rd64-refined",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="http://images.cocodataset.org/val2017/000000039769.jpg",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--prompt",
+        default="a cat, a remote, a blanket",
+        type=str,
+        help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to print the classification results.",
+    )
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    processor = AutoProcessor.from_pretrained(args.model_name_or_path)
+    model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path)
+
+    image = Image.open(requests.get(args.image_path, stream=True).raw)
+    texts = []
+    for text in args.prompt.split(','):
+        texts.append(text)
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+    model.to("hpu")
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
+            model_start_time = time.time()
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+            if args.print_result:
+                logits = outputs.logits
+                print(logits.shape)
+                print("Logits: " + str(logits))
+
+    print("n_iterations: " + str(args.n_iterations))
+    print("Total latency (ms): " + str(total_model_time*1000))
+    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From 80ca06bafe948d38265cf0118dfb0603d6c1e85a Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Thu, 14 Mar 2024 18:44:43 +0000
Subject: [PATCH 02/13] Updated readme and added codes for generating segmented
 images.

---
 examples/object-segementation/ClipSeg/README.md      |  2 +-
 examples/object-segementation/ClipSeg/run_example.py | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
index f476a5a4e..d8c76272a 100644
--- a/examples/object-segementation/ClipSeg/README.md
+++ b/examples/object-segementation/ClipSeg/README.md
@@ -21,7 +21,7 @@ This directory contains an example script that demonstrates using ClipSeg with g
 python3 run_example.py \
     --model_name_or_path "CIDAS/clipseg-rd64-refined" \
     --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
-    --prompt "a cat, a remote, a blanket" \
+    --prompt "cat, remote, blanket" \
     --warmup 3 \
     --n_iterations 20 \
     --use_hpu_graphs \
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
index d4aa3efae..a10b8fa9d 100644
--- a/examples/object-segementation/ClipSeg/run_example.py
+++ b/examples/object-segementation/ClipSeg/run_example.py
@@ -23,6 +23,7 @@
 import habana_frameworks.torch.core as htcore
 import time
 import argparse
+from torchvision.utils import save_image
 
 from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
@@ -99,9 +100,14 @@
             total_model_time = total_model_time + (model_end_time - model_start_time)
 
             if args.print_result:
-                logits = outputs.logits
-                print(logits.shape)
-                print("Logits: " + str(logits))
+                if (i == 0): # generate/output once only
+                    logits = outputs.logits
+                    for j in range(logits.shape[0]):
+                        threshold = 0.5
+                        segmented_image = ((torch.sigmoid(logits[j])  > threshold)*255).unsqueeze(0)
+                        segmented_image = segmented_image.to(torch.float32)
+                        save_image(segmented_image, 'segmented' + texts[j] + '.png')
+                    print('Segmented images are generated.')
 
     print("n_iterations: " + str(args.n_iterations))
     print("Total latency (ms): " + str(total_model_time*1000))

From b99dfdbdc0437d76841f1c4f0ac0e0f9a15b1fc8 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Mon, 18 Mar 2024 18:08:06 +0000
Subject: [PATCH 03/13] Added an example code of SAM model.

---
 .../SegmentAnythingModel/README.md            |  33 ++++++
 .../SegmentAnythingModel/run_example.py       | 109 ++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 examples/object-segementation/SegmentAnythingModel/README.md
 create mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py

diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md
new file mode 100644
index 000000000..8a0f24300
--- /dev/null
+++ b/examples/object-segementation/SegmentAnythingModel/README.md
@@ -0,0 +1,33 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Owl-ViT Examples
+
+This directory contains an example script that demonstrates using SAM with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "facebook/sam-vit-huge" \
+    --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \
+    --point_prompt "450,600" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base)
+  - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge)
\ No newline at end of file
diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py
new file mode 100644
index 000000000..592f7f429
--- /dev/null
+++ b/examples/object-segementation/SegmentAnythingModel/run_example.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/facebook/sam-vit-base
+
+from transformers import SamModel, SamProcessor
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="facebook/sam-vit-huge",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--point_prompt",
+        default="450, 600",
+        type=str,
+        help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to save the segmentation result.",
+    )
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    processor = SamProcessor.from_pretrained(args.model_name_or_path)
+    model = SamModel.from_pretrained(args.model_name_or_path)
+
+    image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB")
+    points = []
+    for text in args.point_prompt.split(','):
+        points.append(int(text))
+    points = [[points]]
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+    model.to("hpu")
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
+            model_start_time = time.time()
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+            if args.print_result:
+                if (i == 0): # generate/output once only
+                    iou = outputs.iou_scores
+                    print("iou score: " + str(iou))
+
+    print("n_iterations: " + str(args.n_iterations))
+    print("Total latency (ms): " + str(total_model_time*1000))
+    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From 6166bdff5c87c197f554efddca363158b46a266f Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Thu, 21 Mar 2024 17:51:56 +0000
Subject: [PATCH 04/13] Add an example of TIMM/FastViT.

---
 .../timm_fastvit/README.md                    |  33 ++++++
 .../timm_fastvit/run_example.py               | 104 ++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 examples/image-classification/timm_fastvit/README.md
 create mode 100644 examples/image-classification/timm_fastvit/run_example.py

diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md
new file mode 100644
index 000000000..65221adaa
--- /dev/null
+++ b/examples/image-classification/timm_fastvit/README.md
@@ -0,0 +1,33 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# FastViT Examples
+
+This directory contains an example script that demonstrates using FastViT with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "timm/fastvit_t8.apple_in1k" \
+    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
+  - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
+  - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
\ No newline at end of file
diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/timm_fastvit/run_example.py
new file mode 100644
index 000000000..2a2e55d05
--- /dev/null
+++ b/examples/image-classification/timm_fastvit/run_example.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k
+
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+from torchvision.utils import save_image
+import timm
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="timm/fastvit_t8.apple_in1k",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to print the classification results.",
+    )
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    
+    model = timm.create_model(args.model_name_or_path, pretrained=True)
+    model.to('hpu')
+    model = model.eval()
+    data_config = timm.data.resolve_model_data_config(model)
+    transforms = timm.data.create_transform(**data_config, is_training=False)
+
+    img = Image.open(requests.get(args.image_path, stream=True).raw)
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+    model.to("hpu")
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = transforms(img).unsqueeze(0).to('hpu')
+            outputs = model(inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = transforms(img).unsqueeze(0).to('hpu')
+            model_start_time = time.time()
+            outputs = model(inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+        if args.print_result:
+            top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5)
+            print("top5_class_indices: " + str(top5_class_indices))
+
+    print("n_iterations: " + str(args.n_iterations))
+    print("Total latency (ms): " + str(total_model_time*1000))
+    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From 388c02bdf72e0c6fa1a7eabb65bef74cdadda40a Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Mon, 25 Mar 2024 21:00:47 +0000
Subject: [PATCH 05/13] Removed unrelated files.

---
 .../object-segementation/ClipSeg/README.md    |  32 -----
 .../ClipSeg/run_example.py                    | 114 ------------------
 .../SegmentAnythingModel/README.md            |  33 -----
 .../SegmentAnythingModel/run_example.py       | 109 -----------------
 4 files changed, 288 deletions(-)
 delete mode 100644 examples/object-segementation/ClipSeg/README.md
 delete mode 100644 examples/object-segementation/ClipSeg/run_example.py
 delete mode 100644 examples/object-segementation/SegmentAnythingModel/README.md
 delete mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py

diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
deleted file mode 100644
index d8c76272a..000000000
--- a/examples/object-segementation/ClipSeg/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Owl-ViT Examples
-
-This directory contains an example script that demonstrates using ClipSeg with graph mode.
-
-## Single-HPU inference
-
-```bash
-python3 run_example.py \
-    --model_name_or_path "CIDAS/clipseg-rd64-refined" \
-    --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
-    --prompt "cat, remote, blanket" \
-    --warmup 3 \
-    --n_iterations 20 \
-    --use_hpu_graphs \
-    --bf16 \
-    --print_result
-```
-Models that have been validated:
-  - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined)
\ No newline at end of file
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
deleted file mode 100644
index a10b8fa9d..000000000
--- a/examples/object-segementation/ClipSeg/run_example.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg
-
-from transformers import AutoProcessor, CLIPSegForImageSegmentation
-from PIL import Image
-import requests
-import torch
-import habana_frameworks.torch as ht
-import habana_frameworks.torch.core as htcore
-import time
-import argparse
-from torchvision.utils import save_image
-
-from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="CIDAS/clipseg-rd64-refined",
-        type=str,
-        help="Path of the pre-trained model",
-    )
-    parser.add_argument(
-        "--image_path",
-        default="http://images.cocodataset.org/val2017/000000039769.jpg",
-        type=str,
-        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
-    )
-    parser.add_argument(
-        "--prompt",
-        default="a cat, a remote, a blanket",
-        type=str,
-        help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")',
-    )
-    parser.add_argument(
-        "--use_hpu_graphs",
-        action="store_true",
-        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
-    )
-    parser.add_argument(
-        "--bf16",
-        action="store_true",
-        help="Whether to use bf16 precision for classification.",
-    )
-    parser.add_argument(
-        "--print_result",
-        action="store_true",
-        help="Whether to print the classification results.",
-    )
-    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
-    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
-
-    args = parser.parse_args()
-
-    adapt_transformers_to_gaudi()
-
-    processor = AutoProcessor.from_pretrained(args.model_name_or_path)
-    model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path)
-
-    image = Image.open(requests.get(args.image_path, stream=True).raw)
-    texts = []
-    for text in args.prompt.split(','):
-        texts.append(text)
-
-    if args.use_hpu_graphs:
-        model = ht.hpu.wrap_in_hpu_graph(model)
-
-    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
-    model.to("hpu")
-
-    with torch.no_grad(), autocast:
-        for i in range(args.warmup):
-            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-
-        total_model_time = 0
-        for i in range(args.n_iterations):
-            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
-            model_start_time = time.time()
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-            model_end_time = time.time()
-            total_model_time = total_model_time + (model_end_time - model_start_time)
-
-            if args.print_result:
-                if (i == 0): # generate/output once only
-                    logits = outputs.logits
-                    for j in range(logits.shape[0]):
-                        threshold = 0.5
-                        segmented_image = ((torch.sigmoid(logits[j])  > threshold)*255).unsqueeze(0)
-                        segmented_image = segmented_image.to(torch.float32)
-                        save_image(segmented_image, 'segmented' + texts[j] + '.png')
-                    print('Segmented images are generated.')
-
-    print("n_iterations: " + str(args.n_iterations))
-    print("Total latency (ms): " + str(total_model_time*1000))
-    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file
diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md
deleted file mode 100644
index 8a0f24300..000000000
--- a/examples/object-segementation/SegmentAnythingModel/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Owl-ViT Examples
-
-This directory contains an example script that demonstrates using SAM with graph mode.
-
-## Single-HPU inference
-
-```bash
-python3 run_example.py \
-    --model_name_or_path "facebook/sam-vit-huge" \
-    --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \
-    --point_prompt "450,600" \
-    --warmup 3 \
-    --n_iterations 20 \
-    --use_hpu_graphs \
-    --bf16 \
-    --print_result
-```
-Models that have been validated:
-  - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base)
-  - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge)
\ No newline at end of file
diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py
deleted file mode 100644
index 592f7f429..000000000
--- a/examples/object-segementation/SegmentAnythingModel/run_example.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-# Copied from https://huggingface.co/facebook/sam-vit-base
-
-from transformers import SamModel, SamProcessor
-from PIL import Image
-import requests
-import torch
-import habana_frameworks.torch as ht
-import habana_frameworks.torch.core as htcore
-import time
-import argparse
-
-from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="facebook/sam-vit-huge",
-        type=str,
-        help="Path of the pre-trained model",
-    )
-    parser.add_argument(
-        "--image_path",
-        default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png",
-        type=str,
-        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
-    )
-    parser.add_argument(
-        "--point_prompt",
-        default="450, 600",
-        type=str,
-        help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")',
-    )
-    parser.add_argument(
-        "--use_hpu_graphs",
-        action="store_true",
-        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
-    )
-    parser.add_argument(
-        "--bf16",
-        action="store_true",
-        help="Whether to use bf16 precision for classification.",
-    )
-    parser.add_argument(
-        "--print_result",
-        action="store_true",
-        help="Whether to save the segmentation result.",
-    )
-    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
-    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
-
-    args = parser.parse_args()
-
-    adapt_transformers_to_gaudi()
-
-    processor = SamProcessor.from_pretrained(args.model_name_or_path)
-    model = SamModel.from_pretrained(args.model_name_or_path)
-
-    image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB")
-    points = []
-    for text in args.point_prompt.split(','):
-        points.append(int(text))
-    points = [[points]]
-
-    if args.use_hpu_graphs:
-        model = ht.hpu.wrap_in_hpu_graph(model)
-
-    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
-    model.to("hpu")
-
-    with torch.no_grad(), autocast:
-        for i in range(args.warmup):
-            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-
-        total_model_time = 0
-        for i in range(args.n_iterations):
-            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
-            model_start_time = time.time()
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-            model_end_time = time.time()
-            total_model_time = total_model_time + (model_end_time - model_start_time)
-
-            if args.print_result:
-                if (i == 0): # generate/output once only
-                    iou = outputs.iou_scores
-                    print("iou score: " + str(iou))
-
-    print("n_iterations: " + str(args.n_iterations))
-    print("Total latency (ms): " + str(total_model_time*1000))
-    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From f14d4a4433b343c7b367af14d128e53a8d6d53e6 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Wed, 15 May 2024 17:34:19 +0000
Subject: [PATCH 06/13] Added Readme, readme.idx etc. Aligned the style.

---
 README.md                                     | 85 ++++++++++++-------
 docs/source/index.mdx                         | 37 +++++---
 examples/image-classification/README.md       | 23 ++++-
 .../image-classification/requirements.txt     |  1 +
 .../run_example.py => run_timm_example.py}    | 28 +++---
 .../timm_fastvit/README.md                    | 33 -------
 6 files changed, 118 insertions(+), 89 deletions(-)
 rename examples/image-classification/{timm_fastvit/run_example.py => run_timm_example.py} (88%)
 delete mode 100644 examples/image-classification/timm_fastvit/README.md

diff --git a/README.md b/README.md
index 09a80082d..f3da039fe 100644
--- a/README.md
+++ b/README.md
@@ -14,21 +14,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->
 
-![](https://github.com/huggingface/optimum-habana/blob/main/readme_logo.png)
+<a href="https://github.com/huggingface/optimum-habana#gh-light-mode-only">
+  <img src="https://github.com/huggingface/optimum-habana/blob/main/readme_logo_light.png"/>
+</a>
 
+<a href="https://github.com/huggingface/optimum-habana#gh-dark-mode-only">
+  <img src="https://github.com/huggingface/optimum-habana/blob/main/readme_logo_dark.png"/>
+</a>
 
-# Optimum Habana
 
-🤗 Optimum Habana is the interface between the 🤗 Transformers and Diffusers libraries and [Habana's Gaudi processor (HPU)](https://docs.habana.ai/en/latest/index.html).
+# Optimum for Intel® Gaudi® Accelerators
+
+Optimum for Intel Gaudi - a.k.a. `optimum-habana` - is the interface between the Transformers and Diffusers libraries and [Intel Gaudi AI Accelerators (HPU)](https://docs.habana.ai/en/latest/index.html).
 It provides a set of tools enabling easy model loading, training and inference on single- and multi-HPU settings for different downstream tasks.
-The list of officially validated models and tasks is available [here](https://github.com/huggingface/optimum-habana#validated-models). Users can try other models and tasks with only few changes.
+The list of officially validated models and tasks is available [here](https://github.com/huggingface/optimum-habana#validated-models). Users can try other of the thousands of Hugging Face models on Intel Gaudi accelerators and tasks with only few changes.
 
 
-## What is a Habana Processing Unit (HPU)?
+## What are Intel Gaudi AI Accelerators (HPUs)?
 
 HPUs offer fast model training and inference as well as a great price-performance ratio.
-Check out [this blog post about BERT pre-training](https://huggingface.co/blog/pretraining-bert) and [this article benchmarking Habana Gaudi2 versus Nvidia A100 GPUs](https://huggingface.co/blog/habana-gaudi-2-benchmark) for concrete examples.
-If you are not familiar with HPUs and would like to know more about them, we recommend you take a look at [our conceptual guide](https://huggingface.co/docs/optimum/habana/concept_guides/hpu).
+Check out [this blog post about BLOOM inference](https://huggingface.co/blog/habana-gaudi-2-bloom) and [this post benchmarking Intel Gaudi 2 and NVIDIA A100 GPUs for BridgeTower training](https://huggingface.co/blog/bridgetower) for concrete examples.
 
 
 ## Install the library and get example scripts
@@ -45,13 +50,13 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up
 To use the example associated with the latest stable release, run:
 > ```
 > git clone https://github.com/huggingface/optimum-habana
-> cd optimum-habana && git checkout v1.10.2
+> cd optimum-habana && git checkout v1.11.1
 > ```
-> with `v1.10.2` the version number of this release.
+> with `v1.11.1` the version number of this release.
 
 ### Option 2: Use the latest main branch under development
 
-Optimum Habana is a fast-moving project, and you may want to install it from source and get the latest scripts :
+Optimum for Intel Gaudi is a fast-moving project, and you may want to install it from source and get the latest scripts :
 
 ```bash
 pip install git+https://github.com/huggingface/optimum-habana.git
@@ -62,7 +67,7 @@ git clone https://github.com/huggingface/optimum-habana
 
 To use DeepSpeed on HPUs, you also need to run the following command:
 >```bash
->pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
+>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.0
 >```
 
 To install the requirements for every example:
@@ -76,7 +81,7 @@ To install the requirements for every example:
 
 ### Quick Start
 
-🤗 Optimum Habana was designed with one goal in mind: **to make training and inference straightforward for any 🤗 Transformers and 🤗 Diffusers user while leveraging the complete power of Gaudi processors**.
+Optimum for Intel Gaudi was designed with one goal in mind: **to make training and inference straightforward for Transformers and Diffusers users, while fully leveraging the power of Intel Gaudi AI Accelerators**.
 
 #### Transformers Interface
 
@@ -84,7 +89,7 @@ There are two main classes one needs to know:
 - [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/trainer): the trainer class that takes care of compiling and distributing the model to run on HPUs, and performing training and evaluation.
 - [GaudiConfig](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config): the class that enables to configure Habana Mixed Precision and to decide whether optimized operators and optimizers should be used or not.
 
-The [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/trainer) is very similar to the [🤗 Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer), and adapting a script using the Trainer to make it work with Gaudi will mostly consist in simply swapping the `Trainer` class for the `GaudiTrainer` one.
+The [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/trainer) is very similar to the [Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer), and adapting a script using the Trainer to make it work with Intel Gaudi accelerators will mostly consist in simply swapping the `Trainer` class for the `GaudiTrainer` one.
 That's how most of the [example scripts](https://github.com/huggingface/optimum-habana/tree/main/examples) were adapted from their [original counterparts](https://github.com/huggingface/transformers/tree/main/examples/pytorch).
 
 Here is an example:
@@ -115,12 +120,12 @@ Here is an example:
 )
 ```
 
-where `gaudi_config_name` is the name of a model from the [Hub](https://huggingface.co/Habana) (Gaudi configurations are stored in model repositories) or a path to a local Gaudi configuration file (you can see [here](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config) how to write your own).
+where `gaudi_config_name` is the name of a model from the [Hub](https://huggingface.co/Habana) (Intel Gaudi configurations are stored in model repositories) or a path to a local Intel Gaudi configuration file (you can see [here](https://huggingface.co/docs/optimum/habana/package_reference/gaudi_config) how to write your own).
 
 
 #### Diffusers Interface
 
-You can generate images from prompts using Stable Diffusion on Gaudi using the [`GaudiStableDiffusionPipeline`](https://huggingface.co/docs/optimum/habana/package_reference/stable_diffusion_pipeline) class and the [`GaudiDDIMScheduler`] which have been both optimized for HPUs. Here is how to use them and the differences with the 🤗 Diffusers library:
+You can generate images from prompts using Stable Diffusion on Intel Gaudi using the [`GaudiStableDiffusionPipeline`](https://huggingface.co/docs/optimum/habana/package_reference/stable_diffusion_pipeline) class and the [`GaudiDDIMScheduler`] which have been both optimized for HPUs. Here is how to use them and the differences with the Diffusers library:
 
 ```diff
 - from diffusers import DDIMScheduler, StableDiffusionPipeline
@@ -151,12 +156,12 @@ outputs = generator(
 
 ### Documentation
 
-Check out [the documentation of Optimum Habana](https://huggingface.co/docs/optimum/habana/index) for more advanced usage.
+Check out [the documentation of Optimum for Intel Gaudi](https://huggingface.co/docs/optimum/habana/index) for more advanced usage.
 
 
 ## Validated Models
 
-The following model architectures, tasks and device distributions have been validated for 🤗 Optimum Habana:
+The following model architectures, tasks and device distributions have been validated for Optimum for Intel Gaudi:
 
 > In the tables below, :heavy_check_mark: means single-card, multi-card and DeepSpeed have all been validated.
 
@@ -175,20 +180,29 @@ The following model architectures, tasks and device distributions have been vali
 | GPT-J | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | GPT-NeoX | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | OPT |   | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Llama 2 / CodeLlama | <div style="text-align:left"><li>DeepSpeed</li><li>LoRA</li></div> | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Llama 2 / CodeLlama / Llama 3 / Llama Guard | :heavy_check_mark: | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)</li> |
 | StableLM |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Falcon | <div style="text-align:left"><li>LoRA</li></div> | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Falcon | <div style="text-align:left"><li>LoRA</li></div> | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | CodeGen |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | MPT |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | Mistral |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Phi | :heavy_check_mark:  | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Mixtral |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Persimmon |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Qwen2 | <div style="text-align:left"><li>Single card</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Gemma | :heavy_check_mark:  | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | T5 / Flan T5 | :heavy_check_mark: | :heavy_check_mark: | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
 | BART |   | <div style="text-align:left"><li>Single card</li></div> | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
 | ViT | :heavy_check_mark: | :heavy_check_mark: | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
 | Swin | :heavy_check_mark: | :heavy_check_mark: | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
 | Wav2Vec2 | :heavy_check_mark: | :heavy_check_mark: | <li>[audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)</li><li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
+| Whisper | :heavy_check_mark: | :heavy_check_mark: | <li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
+| SpeechT5 |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)</li> |
 | CLIP | :heavy_check_mark: | :heavy_check_mark: | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
 | BridgeTower | :heavy_check_mark: | :heavy_check_mark: | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
 | ESMFold |   | <div style="text-align:left"><li>Single card</li></div> | <li>[protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)</li> |
+| Blip |   | <div style="text-align:left"><li>Single card</li></div> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
+| OWLViT |   | <div style="text-align:left"><li>Single card</li></div> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)</li> |
 
 </div>
 
@@ -198,36 +212,49 @@ The following model architectures, tasks and device distributions have been vali
 
 | Architecture     | Training | Inference            | Tasks |
 |------------------|:--------:|:--------------------:|:------|
-| Stable Diffusion |          | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Stable Diffusion | <li>[textual inversion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#textual-inversion)</li><li>[ControlNet](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#controlnet-training)</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Stable Diffusion XL | <li>[fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 | LDM3D            |          | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 
 </div>
 
+- PyTorch Image Models/TIMM:
+
+<div align="center">
+
+| Architecture        | Training | Inference | Tasks |
+|---------------------|:--------:|:---------:|:------|
+| FastViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
+
+</div>
+
 - TRL:
 
 <div align="center">
 
-| Architecture     | Training | Inference            | Tasks |
-|------------------|:--------:|:--------------------:|:------|
-| Llama 2          | :heavy_check_mark: |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
-| Llama 2          | :heavy_check_mark: |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
+| Architecture     | Training | Inference            | Tasks                                                                                          |
+|------------------|:--------:|:--------------------:|:-----------------------------------------------------------------------------------------------|
+| Llama 2          | :heavy_check_mark: |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li>  |
+| Llama 2          | :heavy_check_mark: |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li>  |
+| Stable Diffusion | :heavy_check_mark: |           | <li>[DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
 
 </div>
 
-Other models and tasks supported by the 🤗 Transformers and 🤗 Diffusers library may also work. You can refer to this [section](https://github.com/huggingface/optimum-habana#how-to-use-it) for using them with 🤗 Optimum Habana. Besides, [this page](https://github.com/huggingface/optimum-habana/tree/main/examples) explains how to modify any [example](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the 🤗 Transformers library to make it work with 🤗 Optimum Habana.
+Other models and tasks supported by the Transformers and Diffusers libraries may also work. You can refer to this [section](https://github.com/huggingface/optimum-habana#how-to-use-it) for using them with Optimum for Intel Gaudi. In addition, [this page](https://github.com/huggingface/optimum-habana/tree/main/examples) explains how to modify any [example](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the Transformers library to make it work with Optimum for Intel Gaudi.
 
 If you find any issues while using those, please open an issue or a pull request.
 
+After training your model, feel free to submit it to the Intel [leaderboard](https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard) which is designed to evaluate, score, and rank open-source LLMs that have been pre-trained or fine-tuned on Intel Hardwares. Models submitted to the leaderboard will be evaluated on the Intel Developer Cloud. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from the Eleuther AI Language Model Evaluation Harness.
 
 ## Gaudi Setup
 
-Please refer to Habana Gaudi's official [installation guide](https://docs.habana.ai/en/latest/Installation_Guide/index.html).
+Please refer to the Intel Gaudi AI Accelerator official [installation guide](https://docs.habana.ai/en/latest/Installation_Guide/index.html).
 
-> Tests should be run in a Docker container based on Habana Docker images.
+> Tests should be run in a Docker container based on Intel Gaudi Docker images.
 >
-> The current version has been validated for SynapseAI 1.14.
+> The current version has been validated for SynapseAI 1.15.
 
 
 ## Development
 
-Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions.
+Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions.
\ No newline at end of file
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index ff85c8304..8c23bbb57 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -15,17 +15,17 @@ limitations under the License.
 -->
 
 
-# 🤗 Optimum Habana
+# Optimum for Intel Gaudi
 
-🤗 Optimum Habana is the interface between the 🤗 Transformers and 🤗 Diffusers libraries and [Habana's Gaudi processor (HPU)](https://docs.habana.ai/en/latest/index.html).
+Optimum for Intel Gaudi is the interface between the Transformers and Diffusers libraries and [Intel® Gaudi® AI Accelerators (HPUs)](https://docs.habana.ai/en/latest/index.html).
 It provides a set of tools that enable easy model loading, training and inference on single- and multi-HPU settings for various downstream tasks as shown in the table below.
 
 HPUs offer fast model training and inference as well as a great price-performance ratio.
-Check out [this blog post about BERT pre-training](https://huggingface.co/blog/pretraining-bert) and [this article benchmarking Habana Gaudi2 versus Nvidia A100 GPUs](https://huggingface.co/blog/habana-gaudi-2-benchmark) for concrete examples.
+Check out [this blog post about BERT pre-training](https://huggingface.co/blog/pretraining-bert) and [this post benchmarking Intel Gaudi 2 with NVIDIA A100 GPUs](https://huggingface.co/blog/habana-gaudi-2-benchmark) for concrete examples.
 If you are not familiar with HPUs, we recommend you take a look at [our conceptual guide](./concept_guides/hpu).
 
 
-The following model architectures, tasks and device distributions have been validated for 🤗 Optimum Habana:
+The following model architectures, tasks and device distributions have been validated for Optimum for Intel Gaudi:
 
 <Tip>
 
@@ -47,28 +47,42 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 | GPT-J        | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | GPT-NeoX     | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | OPT          |          | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Llama 2 / CodeLlama | <div style="text-align:left"><li>DeepSpeed</li><li>LoRA</li></div> | ✅ | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Llama 2 / CodeLlama / Llama 3 / Llama Guard | ✅ | ✅ | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)</li> |
 | StableLM     |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | Falcon       | <div style="text-align:left"><li>LoRA</li></div> | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | CodeGen      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | MPT          |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | Mistral      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Phi          | ✅       | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Mixtral      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Gemma        | ✅       | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Qwen2        | <div style="text-align:left"><li>Single card</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| Persimmon    |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | T5 / Flan T5 | ✅       | ✅        | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
 | BART         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
 | ViT          | ✅       | ✅        | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
 | Swin         | ✅       | ✅        | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
 | Wav2Vec2     | ✅       | ✅        | <li>[audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)</li><li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
+| Whisper      | ✅       | ✅        | <li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
+| SpeechT5     |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)</li> |
 | CLIP         | ✅       | ✅        | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
 | BridgeTower  | ✅       | ✅        | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
 | ESMFold      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)</li> |
+| Blip         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
+| OWLViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)</li> |
 
-- Diffusers
+- Diffusers:
 
-| Architecture     | Training | Inference | Tasks |
-|------------------|:--------:|:---------:|:------|
-| Stable Diffusion |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| LDM3D            |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Architecture        | Training | Inference | Tasks |
+|---------------------|:--------:|:---------:|:------|
+| Stable Diffusion    | <li>[textual inversion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#textual-inversion)</li><li>[ControlNet](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#controlnet-training)</li> | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| Stable Diffusion XL | <li>[fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)</li> | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
+| LDM3D               |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 
+- PyTorch Image Models/TIMM:
+| Architecture        | Training | Inference | Tasks |
+|---------------------|:--------:|:---------:|:------|
+| FastViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
 
 - TRL:
 
@@ -76,6 +90,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 |------------------|:--------:|:--------------------:|:------|
 | Llama 2          | ✅       |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
 | Llama 2          | ✅       |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
+| Stable Diffusion | ✅       |           | <li>[DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
 
 
 Other models and tasks supported by the 🤗 Transformers and 🤗 Diffusers library may also work.
@@ -102,4 +117,4 @@ Besides, [this page](https://github.com/huggingface/optimum-habana/tree/main/exa
       <p class="text-gray-700">Technical descriptions of how the Habana classes and methods of 🤗 Optimum Habana work.</p>
     </a>
   </div>
-</div>
+</div>
\ No newline at end of file
diff --git a/examples/image-classification/README.md b/examples/image-classification/README.md
index f7a9cdd18..8d21f38e6 100644
--- a/examples/image-classification/README.md
+++ b/examples/image-classification/README.md
@@ -16,7 +16,7 @@ limitations under the License.
 
 # Image Classification Examples
 
-This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data).
+This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data). This directory also contains a script to demonstrate a single HPU inference for [PyTorch-Image-Models/TIMM](https://huggingface.co/docs/timm/index)
 
 
 ## Single-HPU training
@@ -288,3 +288,24 @@ python run_image_classification.py \
     --gaudi_config_name Habana/vit \
     --dataloader_num_workers 1 \
     --bf16
+
+## TIMM/FastViT Examples
+
+This directory contains an example script that demonstrates using FastViT with graph mode.
+
+### Single-HPU inference
+
+```bash
+python3 run_timm_example.py \
+    --model_name_or_path "timm/fastvit_t8.apple_in1k" \
+    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
+  - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
+  - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
\ No newline at end of file
diff --git a/examples/image-classification/requirements.txt b/examples/image-classification/requirements.txt
index 87694059f..7b0e43a8d 100644
--- a/examples/image-classification/requirements.txt
+++ b/examples/image-classification/requirements.txt
@@ -3,3 +3,4 @@ torchvision>=0.6.0
 datasets>=2.14.0
 evaluate
 scikit-learn
+timm>=0.9.16
\ No newline at end of file
diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/run_timm_example.py
similarity index 88%
rename from examples/image-classification/timm_fastvit/run_example.py
rename to examples/image-classification/run_timm_example.py
index 2a2e55d05..6d96b0102 100644
--- a/examples/image-classification/timm_fastvit/run_example.py
+++ b/examples/image-classification/run_timm_example.py
@@ -15,19 +15,18 @@
 
 # Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k
 
-from transformers import AutoProcessor, CLIPSegForImageSegmentation
-from PIL import Image
-import requests
-import torch
-import habana_frameworks.torch as ht
-import habana_frameworks.torch.core as htcore
-import time
 import argparse
-from torchvision.utils import save_image
+import time
+
+import habana_frameworks.torch as ht
+import requests
 import timm
+import torch
+from PIL import Image
 
 from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
@@ -65,9 +64,8 @@
 
     adapt_transformers_to_gaudi()
 
-    
     model = timm.create_model(args.model_name_or_path, pretrained=True)
-    model.to('hpu')
+    model.to("hpu")
     model = model.eval()
     data_config = timm.data.resolve_model_data_config(model)
     transforms = timm.data.create_transform(**data_config, is_training=False)
@@ -82,13 +80,13 @@
 
     with torch.no_grad(), autocast:
         for i in range(args.warmup):
-            inputs = transforms(img).unsqueeze(0).to('hpu')
+            inputs = transforms(img).unsqueeze(0).to("hpu")
             outputs = model(inputs)
             torch.hpu.synchronize()
 
         total_model_time = 0
         for i in range(args.n_iterations):
-            inputs = transforms(img).unsqueeze(0).to('hpu')
+            inputs = transforms(img).unsqueeze(0).to("hpu")
             model_start_time = time.time()
             outputs = model(inputs)
             torch.hpu.synchronize()
@@ -97,8 +95,8 @@
 
         if args.print_result:
             top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5)
-            print("top5_class_indices: " + str(top5_class_indices))
+            print("top5_class_indices: " + str(top5_class_indices.to("cpu").numpy()))
 
     print("n_iterations: " + str(args.n_iterations))
-    print("Total latency (ms): " + str(total_model_time*1000))
-    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file
+    print("Total latency (ms): " + str(total_model_time * 1000))
+    print("Average latency (ms): " + str(total_model_time * 1000 / args.n_iterations))
diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md
deleted file mode 100644
index 65221adaa..000000000
--- a/examples/image-classification/timm_fastvit/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# FastViT Examples
-
-This directory contains an example script that demonstrates using FastViT with graph mode.
-
-## Single-HPU inference
-
-```bash
-python3 run_example.py \
-    --model_name_or_path "timm/fastvit_t8.apple_in1k" \
-    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
-    --warmup 3 \
-    --n_iterations 20 \
-    --use_hpu_graphs \
-    --bf16 \
-    --print_result
-```
-Models that have been validated:
-  - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
-  - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
-  - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
\ No newline at end of file

From 5386b17e3050fcb2c827723ef0ac5a36802adbcd Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Thu, 14 Mar 2024 17:53:19 +0000
Subject: [PATCH 07/13] Add an example of object-segmentation (ClipSeg) using
 graph mode.

---
 .../object-segementation/ClipSeg/README.md    |  32 ++++++
 .../ClipSeg/run_example.py                    | 108 ++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 examples/object-segementation/ClipSeg/README.md
 create mode 100644 examples/object-segementation/ClipSeg/run_example.py

diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
new file mode 100644
index 000000000..f476a5a4e
--- /dev/null
+++ b/examples/object-segementation/ClipSeg/README.md
@@ -0,0 +1,32 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Owl-ViT Examples
+
+This directory contains an example script that demonstrates using ClipSeg with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "CIDAS/clipseg-rd64-refined" \
+    --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
+    --prompt "a cat, a remote, a blanket" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined)
\ No newline at end of file
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
new file mode 100644
index 000000000..d4aa3efae
--- /dev/null
+++ b/examples/object-segementation/ClipSeg/run_example.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg
+
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="CIDAS/clipseg-rd64-refined",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="http://images.cocodataset.org/val2017/000000039769.jpg",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--prompt",
+        default="a cat, a remote, a blanket",
+        type=str,
+        help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to print the classification results.",
+    )
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    processor = AutoProcessor.from_pretrained(args.model_name_or_path)
+    model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path)
+
+    image = Image.open(requests.get(args.image_path, stream=True).raw)
+    texts = []
+    for text in args.prompt.split(','):
+        texts.append(text)
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+    model.to("hpu")
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
+            model_start_time = time.time()
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+            if args.print_result:
+                logits = outputs.logits
+                print(logits.shape)
+                print("Logits: " + str(logits))
+
+    print("n_iterations: " + str(args.n_iterations))
+    print("Total latency (ms): " + str(total_model_time*1000))
+    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From 4318a09abd196ffaae887351b2146a58a96617d1 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Thu, 14 Mar 2024 18:44:43 +0000
Subject: [PATCH 08/13] Updated readme and added codes for generating segmented
 images.

---
 examples/object-segementation/ClipSeg/README.md      |  2 +-
 examples/object-segementation/ClipSeg/run_example.py | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
index f476a5a4e..d8c76272a 100644
--- a/examples/object-segementation/ClipSeg/README.md
+++ b/examples/object-segementation/ClipSeg/README.md
@@ -21,7 +21,7 @@ This directory contains an example script that demonstrates using ClipSeg with g
 python3 run_example.py \
     --model_name_or_path "CIDAS/clipseg-rd64-refined" \
     --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
-    --prompt "a cat, a remote, a blanket" \
+    --prompt "cat, remote, blanket" \
     --warmup 3 \
     --n_iterations 20 \
     --use_hpu_graphs \
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
index d4aa3efae..a10b8fa9d 100644
--- a/examples/object-segementation/ClipSeg/run_example.py
+++ b/examples/object-segementation/ClipSeg/run_example.py
@@ -23,6 +23,7 @@
 import habana_frameworks.torch.core as htcore
 import time
 import argparse
+from torchvision.utils import save_image
 
 from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
@@ -99,9 +100,14 @@
             total_model_time = total_model_time + (model_end_time - model_start_time)
 
             if args.print_result:
-                logits = outputs.logits
-                print(logits.shape)
-                print("Logits: " + str(logits))
+                if (i == 0): # generate/output once only
+                    logits = outputs.logits
+                    for j in range(logits.shape[0]):
+                        threshold = 0.5
+                        segmented_image = ((torch.sigmoid(logits[j])  > threshold)*255).unsqueeze(0)
+                        segmented_image = segmented_image.to(torch.float32)
+                        save_image(segmented_image, 'segmented' + texts[j] + '.png')
+                    print('Segmented images are generated.')
 
     print("n_iterations: " + str(args.n_iterations))
     print("Total latency (ms): " + str(total_model_time*1000))

From b3c704317f6bb22cf039db58690ffe2433faf867 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Mon, 18 Mar 2024 18:08:06 +0000
Subject: [PATCH 09/13] Added an example code of SAM model.

---
 .../SegmentAnythingModel/README.md            |  33 ++++++
 .../SegmentAnythingModel/run_example.py       | 109 ++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 examples/object-segementation/SegmentAnythingModel/README.md
 create mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py

diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md
new file mode 100644
index 000000000..8a0f24300
--- /dev/null
+++ b/examples/object-segementation/SegmentAnythingModel/README.md
@@ -0,0 +1,33 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Owl-ViT Examples
+
+This directory contains an example script that demonstrates using SAM with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "facebook/sam-vit-huge" \
+    --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \
+    --point_prompt "450,600" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base)
+  - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge)
\ No newline at end of file
diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py
new file mode 100644
index 000000000..592f7f429
--- /dev/null
+++ b/examples/object-segementation/SegmentAnythingModel/run_example.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/facebook/sam-vit-base
+
+from transformers import SamModel, SamProcessor
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="facebook/sam-vit-huge",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--point_prompt",
+        default="450, 600",
+        type=str,
+        help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to save the segmentation result.",
+    )
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    processor = SamProcessor.from_pretrained(args.model_name_or_path)
+    model = SamModel.from_pretrained(args.model_name_or_path)
+
+    image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB")
+    points = []
+    for text in args.point_prompt.split(','):
+        points.append(int(text))
+    points = [[points]]
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+    model.to("hpu")
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
+            model_start_time = time.time()
+            outputs = model(**inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+            if args.print_result:
+                if (i == 0): # generate/output once only
+                    iou = outputs.iou_scores
+                    print("iou score: " + str(iou))
+
+    print("n_iterations: " + str(args.n_iterations))
+    print("Total latency (ms): " + str(total_model_time*1000))
+    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From 0693d4178390f4fc516c869c304f32da209467a6 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Thu, 21 Mar 2024 17:51:56 +0000
Subject: [PATCH 10/13] Add an example of TIMM/FastViT.

---
 .../timm_fastvit/README.md                    |  33 ++++++
 .../timm_fastvit/run_example.py               | 104 ++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 examples/image-classification/timm_fastvit/README.md
 create mode 100644 examples/image-classification/timm_fastvit/run_example.py

diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md
new file mode 100644
index 000000000..65221adaa
--- /dev/null
+++ b/examples/image-classification/timm_fastvit/README.md
@@ -0,0 +1,33 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# FastViT Examples
+
+This directory contains an example script that demonstrates using FastViT with graph mode.
+
+## Single-HPU inference
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "timm/fastvit_t8.apple_in1k" \
+    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
+  - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
+  - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
\ No newline at end of file
diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/timm_fastvit/run_example.py
new file mode 100644
index 000000000..2a2e55d05
--- /dev/null
+++ b/examples/image-classification/timm_fastvit/run_example.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k
+
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+from PIL import Image
+import requests
+import torch
+import habana_frameworks.torch as ht
+import habana_frameworks.torch.core as htcore
+import time
+import argparse
+from torchvision.utils import save_image
+import timm
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="timm/fastvit_t8.apple_in1k",
+        type=str,
+        help="Path of the pre-trained model",
+    )
+    parser.add_argument(
+        "--image_path",
+        default="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
+        type=str,
+        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to use bf16 precision for classification.",
+    )
+    parser.add_argument(
+        "--print_result",
+        action="store_true",
+        help="Whether to print the classification results.",
+    )
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+
+    args = parser.parse_args()
+
+    adapt_transformers_to_gaudi()
+
+    
+    model = timm.create_model(args.model_name_or_path, pretrained=True)
+    model.to('hpu')
+    model = model.eval()
+    data_config = timm.data.resolve_model_data_config(model)
+    transforms = timm.data.create_transform(**data_config, is_training=False)
+
+    img = Image.open(requests.get(args.image_path, stream=True).raw)
+
+    if args.use_hpu_graphs:
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
+    model.to("hpu")
+
+    with torch.no_grad(), autocast:
+        for i in range(args.warmup):
+            inputs = transforms(img).unsqueeze(0).to('hpu')
+            outputs = model(inputs)
+            torch.hpu.synchronize()
+
+        total_model_time = 0
+        for i in range(args.n_iterations):
+            inputs = transforms(img).unsqueeze(0).to('hpu')
+            model_start_time = time.time()
+            outputs = model(inputs)
+            torch.hpu.synchronize()
+            model_end_time = time.time()
+            total_model_time = total_model_time + (model_end_time - model_start_time)
+
+        if args.print_result:
+            top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5)
+            print("top5_class_indices: " + str(top5_class_indices))
+
+    print("n_iterations: " + str(args.n_iterations))
+    print("Total latency (ms): " + str(total_model_time*1000))
+    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From 4c496a322b9ae6535e641353ee2215071a9b8d57 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Mon, 25 Mar 2024 21:00:47 +0000
Subject: [PATCH 11/13] Removed unrelated files.

---
 .../object-segementation/ClipSeg/README.md    |  32 -----
 .../ClipSeg/run_example.py                    | 114 ------------------
 .../SegmentAnythingModel/README.md            |  33 -----
 .../SegmentAnythingModel/run_example.py       | 109 -----------------
 4 files changed, 288 deletions(-)
 delete mode 100644 examples/object-segementation/ClipSeg/README.md
 delete mode 100644 examples/object-segementation/ClipSeg/run_example.py
 delete mode 100644 examples/object-segementation/SegmentAnythingModel/README.md
 delete mode 100644 examples/object-segementation/SegmentAnythingModel/run_example.py

diff --git a/examples/object-segementation/ClipSeg/README.md b/examples/object-segementation/ClipSeg/README.md
deleted file mode 100644
index d8c76272a..000000000
--- a/examples/object-segementation/ClipSeg/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Owl-ViT Examples
-
-This directory contains an example script that demonstrates using ClipSeg with graph mode.
-
-## Single-HPU inference
-
-```bash
-python3 run_example.py \
-    --model_name_or_path "CIDAS/clipseg-rd64-refined" \
-    --image_path "http://images.cocodataset.org/val2017/000000039769.jpg" \
-    --prompt "cat, remote, blanket" \
-    --warmup 3 \
-    --n_iterations 20 \
-    --use_hpu_graphs \
-    --bf16 \
-    --print_result
-```
-Models that have been validated:
-  - [clipseg-rd64-refined ](https://huggingface.co/CIDAS/clipseg-rd64-refined)
\ No newline at end of file
diff --git a/examples/object-segementation/ClipSeg/run_example.py b/examples/object-segementation/ClipSeg/run_example.py
deleted file mode 100644
index a10b8fa9d..000000000
--- a/examples/object-segementation/ClipSeg/run_example.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-# Copied from https://huggingface.co/docs/transformers/main/en/model_doc/clipseg
-
-from transformers import AutoProcessor, CLIPSegForImageSegmentation
-from PIL import Image
-import requests
-import torch
-import habana_frameworks.torch as ht
-import habana_frameworks.torch.core as htcore
-import time
-import argparse
-from torchvision.utils import save_image
-
-from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="CIDAS/clipseg-rd64-refined",
-        type=str,
-        help="Path of the pre-trained model",
-    )
-    parser.add_argument(
-        "--image_path",
-        default="http://images.cocodataset.org/val2017/000000039769.jpg",
-        type=str,
-        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
-    )
-    parser.add_argument(
-        "--prompt",
-        default="a cat, a remote, a blanket",
-        type=str,
-        help='Prompt for classification. It should be a string seperated by comma. (eg: --prompt "a photo of a cat, a photo of a dog")',
-    )
-    parser.add_argument(
-        "--use_hpu_graphs",
-        action="store_true",
-        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
-    )
-    parser.add_argument(
-        "--bf16",
-        action="store_true",
-        help="Whether to use bf16 precision for classification.",
-    )
-    parser.add_argument(
-        "--print_result",
-        action="store_true",
-        help="Whether to print the classification results.",
-    )
-    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
-    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
-
-    args = parser.parse_args()
-
-    adapt_transformers_to_gaudi()
-
-    processor = AutoProcessor.from_pretrained(args.model_name_or_path)
-    model = CLIPSegForImageSegmentation.from_pretrained(args.model_name_or_path)
-
-    image = Image.open(requests.get(args.image_path, stream=True).raw)
-    texts = []
-    for text in args.prompt.split(','):
-        texts.append(text)
-
-    if args.use_hpu_graphs:
-        model = ht.hpu.wrap_in_hpu_graph(model)
-
-    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
-    model.to("hpu")
-
-    with torch.no_grad(), autocast:
-        for i in range(args.warmup):
-            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-
-        total_model_time = 0
-        for i in range(args.n_iterations):
-            inputs = processor(text=texts, images=[image]* len(texts), padding=True, return_tensors="pt").to("hpu")
-            model_start_time = time.time()
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-            model_end_time = time.time()
-            total_model_time = total_model_time + (model_end_time - model_start_time)
-
-            if args.print_result:
-                if (i == 0): # generate/output once only
-                    logits = outputs.logits
-                    for j in range(logits.shape[0]):
-                        threshold = 0.5
-                        segmented_image = ((torch.sigmoid(logits[j])  > threshold)*255).unsqueeze(0)
-                        segmented_image = segmented_image.to(torch.float32)
-                        save_image(segmented_image, 'segmented' + texts[j] + '.png')
-                    print('Segmented images are generated.')
-
-    print("n_iterations: " + str(args.n_iterations))
-    print("Total latency (ms): " + str(total_model_time*1000))
-    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file
diff --git a/examples/object-segementation/SegmentAnythingModel/README.md b/examples/object-segementation/SegmentAnythingModel/README.md
deleted file mode 100644
index 8a0f24300..000000000
--- a/examples/object-segementation/SegmentAnythingModel/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Owl-ViT Examples
-
-This directory contains an example script that demonstrates using SAM with graph mode.
-
-## Single-HPU inference
-
-```bash
-python3 run_example.py \
-    --model_name_or_path "facebook/sam-vit-huge" \
-    --image_path "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" \
-    --point_prompt "450,600" \
-    --warmup 3 \
-    --n_iterations 20 \
-    --use_hpu_graphs \
-    --bf16 \
-    --print_result
-```
-Models that have been validated:
-  - [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base)
-  - [facebook/sam-vit-huge](https://huggingface.co/facebook/sam-vit-huge)
\ No newline at end of file
diff --git a/examples/object-segementation/SegmentAnythingModel/run_example.py b/examples/object-segementation/SegmentAnythingModel/run_example.py
deleted file mode 100644
index 592f7f429..000000000
--- a/examples/object-segementation/SegmentAnythingModel/run_example.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-# Copied from https://huggingface.co/facebook/sam-vit-base
-
-from transformers import SamModel, SamProcessor
-from PIL import Image
-import requests
-import torch
-import habana_frameworks.torch as ht
-import habana_frameworks.torch.core as htcore
-import time
-import argparse
-
-from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="facebook/sam-vit-huge",
-        type=str,
-        help="Path of the pre-trained model",
-    )
-    parser.add_argument(
-        "--image_path",
-        default="https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png",
-        type=str,
-        help='Path of the input image. Should be a single string (eg: --image_path "URL")',
-    )
-    parser.add_argument(
-        "--point_prompt",
-        default="450, 600",
-        type=str,
-        help='Prompt for segmentation. It should be a string seperated by comma. (eg: --point_prompt "450, 600")',
-    )
-    parser.add_argument(
-        "--use_hpu_graphs",
-        action="store_true",
-        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
-    )
-    parser.add_argument(
-        "--bf16",
-        action="store_true",
-        help="Whether to use bf16 precision for classification.",
-    )
-    parser.add_argument(
-        "--print_result",
-        action="store_true",
-        help="Whether to save the segmentation result.",
-    )
-    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
-    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
-
-    args = parser.parse_args()
-
-    adapt_transformers_to_gaudi()
-
-    processor = SamProcessor.from_pretrained(args.model_name_or_path)
-    model = SamModel.from_pretrained(args.model_name_or_path)
-
-    image = Image.open(requests.get(args.image_path, stream=True).raw).convert("RGB")
-    points = []
-    for text in args.point_prompt.split(','):
-        points.append(int(text))
-    points = [[points]]
-
-    if args.use_hpu_graphs:
-        model = ht.hpu.wrap_in_hpu_graph(model)
-
-    autocast = torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=args.bf16)
-    model.to("hpu")
-
-    with torch.no_grad(), autocast:
-        for i in range(args.warmup):
-            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-
-        total_model_time = 0
-        for i in range(args.n_iterations):
-            inputs = processor(image, input_points=points, return_tensors="pt").to("hpu")
-            model_start_time = time.time()
-            outputs = model(**inputs)
-            torch.hpu.synchronize()
-            model_end_time = time.time()
-            total_model_time = total_model_time + (model_end_time - model_start_time)
-
-            if args.print_result:
-                if (i == 0): # generate/output once only
-                    iou = outputs.iou_scores
-                    print("iou score: " + str(iou))
-
-    print("n_iterations: " + str(args.n_iterations))
-    print("Total latency (ms): " + str(total_model_time*1000))
-    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file

From bda2df2cbb188a81cdd21904d1cb7a788c9ab422 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Wed, 15 May 2024 17:34:19 +0000
Subject: [PATCH 12/13] Added Readme, readme.idx etc. Aligned the style.

---
 README.md                                     | 12 ++++++-
 docs/source/index.mdx                         |  8 +++--
 examples/image-classification/README.md       | 23 ++++++++++++-
 .../image-classification/requirements.txt     |  1 +
 .../run_example.py => run_timm_example.py}    | 28 ++++++++--------
 .../timm_fastvit/README.md                    | 33 -------------------
 6 files changed, 53 insertions(+), 52 deletions(-)
 rename examples/image-classification/{timm_fastvit/run_example.py => run_timm_example.py} (88%)
 delete mode 100644 examples/image-classification/timm_fastvit/README.md

diff --git a/README.md b/README.md
index f84db6484..f3da039fe 100644
--- a/README.md
+++ b/README.md
@@ -218,6 +218,16 @@ The following model architectures, tasks and device distributions have been vali
 
 </div>
 
+- PyTorch Image Models/TIMM:
+
+<div align="center">
+
+| Architecture        | Training | Inference | Tasks |
+|---------------------|:--------:|:---------:|:------|
+| FastViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
+
+</div>
+
 - TRL:
 
 <div align="center">
@@ -247,4 +257,4 @@ Please refer to the Intel Gaudi AI Accelerator official [installation guide](htt
 
 ## Development
 
-Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions.
+Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions.
\ No newline at end of file
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index 63ec3279c..8c23bbb57 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -71,7 +71,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 | Blip         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
 | OWLViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)</li> |
 
-- Diffusers
+- Diffusers:
 
 | Architecture        | Training | Inference | Tasks |
 |---------------------|:--------:|:---------:|:------|
@@ -79,6 +79,10 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 | Stable Diffusion XL | <li>[fine-tuning](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training#fine-tuning-for-stable-diffusion-xl)</li> | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 | LDM3D               |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
 
+- PyTorch Image Models/TIMM:
+| Architecture        | Training | Inference | Tasks |
+|---------------------|:--------:|:---------:|:------|
+| FastViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
 
 - TRL:
 
@@ -113,4 +117,4 @@ Besides, [this page](https://github.com/huggingface/optimum-habana/tree/main/exa
       <p class="text-gray-700">Technical descriptions of how the Habana classes and methods of 🤗 Optimum Habana work.</p>
     </a>
   </div>
-</div>
+</div>
\ No newline at end of file
diff --git a/examples/image-classification/README.md b/examples/image-classification/README.md
index f7a9cdd18..8d21f38e6 100644
--- a/examples/image-classification/README.md
+++ b/examples/image-classification/README.md
@@ -16,7 +16,7 @@ limitations under the License.
 
 # Image Classification Examples
 
-This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data).
+This directory contains a script that showcases how to fine-tune any model supported by the [`AutoModelForImageClassification` API](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) (such as [ViT](https://huggingface.co/docs/transformers/main/en/model_doc/vit) or [Swin Transformer](https://huggingface.co/docs/transformers/main/en/model_doc/swin)) on HPUs. They can be used to fine-tune models on both [datasets from the hub](#using-datasets-from-hub) as well as on [your own custom data](#using-your-own-data). This directory also contains a script to demonstrate a single HPU inference for [PyTorch-Image-Models/TIMM](https://huggingface.co/docs/timm/index)
 
 
 ## Single-HPU training
@@ -288,3 +288,24 @@ python run_image_classification.py \
     --gaudi_config_name Habana/vit \
     --dataloader_num_workers 1 \
     --bf16
+
+## TIMM/FastViT Examples
+
+This directory contains an example script that demonstrates using FastViT with graph mode.
+
+### Single-HPU inference
+
+```bash
+python3 run_timm_example.py \
+    --model_name_or_path "timm/fastvit_t8.apple_in1k" \
+    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
+    --warmup 3 \
+    --n_iterations 20 \
+    --use_hpu_graphs \
+    --bf16 \
+    --print_result
+```
+Models that have been validated:
+  - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
+  - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
+  - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
\ No newline at end of file
diff --git a/examples/image-classification/requirements.txt b/examples/image-classification/requirements.txt
index 87694059f..7b0e43a8d 100644
--- a/examples/image-classification/requirements.txt
+++ b/examples/image-classification/requirements.txt
@@ -3,3 +3,4 @@ torchvision>=0.6.0
 datasets>=2.14.0
 evaluate
 scikit-learn
+timm>=0.9.16
\ No newline at end of file
diff --git a/examples/image-classification/timm_fastvit/run_example.py b/examples/image-classification/run_timm_example.py
similarity index 88%
rename from examples/image-classification/timm_fastvit/run_example.py
rename to examples/image-classification/run_timm_example.py
index 2a2e55d05..6d96b0102 100644
--- a/examples/image-classification/timm_fastvit/run_example.py
+++ b/examples/image-classification/run_timm_example.py
@@ -15,19 +15,18 @@
 
 # Copied from https://huggingface.co/timm/fastvit_t8.apple_in1k
 
-from transformers import AutoProcessor, CLIPSegForImageSegmentation
-from PIL import Image
-import requests
-import torch
-import habana_frameworks.torch as ht
-import habana_frameworks.torch.core as htcore
-import time
 import argparse
-from torchvision.utils import save_image
+import time
+
+import habana_frameworks.torch as ht
+import requests
 import timm
+import torch
+from PIL import Image
 
 from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
@@ -65,9 +64,8 @@
 
     adapt_transformers_to_gaudi()
 
-    
     model = timm.create_model(args.model_name_or_path, pretrained=True)
-    model.to('hpu')
+    model.to("hpu")
     model = model.eval()
     data_config = timm.data.resolve_model_data_config(model)
     transforms = timm.data.create_transform(**data_config, is_training=False)
@@ -82,13 +80,13 @@
 
     with torch.no_grad(), autocast:
         for i in range(args.warmup):
-            inputs = transforms(img).unsqueeze(0).to('hpu')
+            inputs = transforms(img).unsqueeze(0).to("hpu")
             outputs = model(inputs)
             torch.hpu.synchronize()
 
         total_model_time = 0
         for i in range(args.n_iterations):
-            inputs = transforms(img).unsqueeze(0).to('hpu')
+            inputs = transforms(img).unsqueeze(0).to("hpu")
             model_start_time = time.time()
             outputs = model(inputs)
             torch.hpu.synchronize()
@@ -97,8 +95,8 @@
 
         if args.print_result:
             top5_probabilities, top5_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=5)
-            print("top5_class_indices: " + str(top5_class_indices))
+            print("top5_class_indices: " + str(top5_class_indices.to("cpu").numpy()))
 
     print("n_iterations: " + str(args.n_iterations))
-    print("Total latency (ms): " + str(total_model_time*1000))
-    print("Average latency (ms): " + str(total_model_time*1000/args.n_iterations))
\ No newline at end of file
+    print("Total latency (ms): " + str(total_model_time * 1000))
+    print("Average latency (ms): " + str(total_model_time * 1000 / args.n_iterations))
diff --git a/examples/image-classification/timm_fastvit/README.md b/examples/image-classification/timm_fastvit/README.md
deleted file mode 100644
index 65221adaa..000000000
--- a/examples/image-classification/timm_fastvit/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# FastViT Examples
-
-This directory contains an example script that demonstrates using FastViT with graph mode.
-
-## Single-HPU inference
-
-```bash
-python3 run_example.py \
-    --model_name_or_path "timm/fastvit_t8.apple_in1k" \
-    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" \
-    --warmup 3 \
-    --n_iterations 20 \
-    --use_hpu_graphs \
-    --bf16 \
-    --print_result
-```
-Models that have been validated:
-  - [timm/fastvit_t8.apple_dist_in1k](https://huggingface.co/timm/fastvit_t8.apple_dist_in1k)
-  - [timm/fastvit_t8.apple_in1k](https://huggingface.co/timm/fastvit_t8.apple_in1k)
-  - [timm/fastvit_sa12.apple_in1k](https://huggingface.co/timm/fastvit_sa12.apple_in1k)
\ No newline at end of file

From 3b76c1b2abdeea78e64c00ccbee7de9ac7a8ebc7 Mon Sep 17 00:00:00 2001
From: Raymond Lau <kwun.fung.lau@intel.com>
Date: Tue, 25 Jun 2024 17:31:42 +0000
Subject: [PATCH 13/13] Rebase to the latest main branch and added test cases.

---
 Makefile                           |   5 ++
 tests/test_image_classification.py | 120 +++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 tests/test_image_classification.py

diff --git a/Makefile b/Makefile
index 6e87a399a..a43f2a55a 100644
--- a/Makefile
+++ b/Makefile
@@ -41,6 +41,11 @@ fast_tests_diffusers:
 	python -m pip install .[tests]
 	python -m pytest tests/test_diffusers.py
 
+# Run single-card non-regression tests on image classification models
+fast_tests_image_classifications:
+	pip install timm
+	python -m pytest tests/test_image_classification.py
+
 # Run single-card non-regression tests
 slow_tests_1x: test_installs
 	python -m pytest tests/test_examples.py -v -s -k "single_card"
diff --git a/tests/test_image_classification.py b/tests/test_image_classification.py
new file mode 100644
index 000000000..6e59b7ac4
--- /dev/null
+++ b/tests/test_image_classification.py
@@ -0,0 +1,120 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from unittest import TestCase
+
+import habana_frameworks.torch as ht
+import numpy as np
+import requests
+import timm
+import torch
+from PIL import Image
+
+from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+
+
+adapt_transformers_to_gaudi()
+
+# For Gaudi 2
+LATENCY_FastViT_BF16_GRAPH_BASELINE = 2.5270626640319824
+
+
+class GaudiFastViTTester(TestCase):
+    """
+    Tests for FastViT model
+    """
+
+    def prepare_model_and_processor(self):
+        model = timm.create_model("timm/fastvit_t8.apple_in1k", pretrained=True)
+        model.to("hpu")
+        model = model.eval()
+        data_config = timm.data.resolve_model_data_config(model)
+        processor = timm.data.create_transform(**data_config, is_training=False)
+        return model, processor
+
+    def prepare_data(self):
+        url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png"
+        image = Image.open(requests.get(url, stream=True).raw)
+        return image
+
+    def test_inference_default(self):
+        model, processor = self.prepare_model_and_processor()
+        image = self.prepare_data()
+        inputs = processor(image).unsqueeze(0).to("hpu")
+        outputs = model(inputs)
+        top1_probabilities, top1_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=1)
+        top1_probabilities = top1_probabilities.to("cpu").detach().numpy()
+        top1_class_indices = top1_class_indices.to("cpu").numpy()
+        expected_scores = np.array([21.406523])  # from CPU
+        expected_class = np.array([960])
+        self.assertEqual(top1_class_indices, expected_class)
+        self.assertLess(np.abs(top1_probabilities - expected_scores).max(), 1)
+
+    def test_inference_autocast(self):
+        model, processor = self.prepare_model_and_processor()
+        image = self.prepare_data()
+        inputs = processor(image).unsqueeze(0).to("hpu")
+
+        with torch.autocast(device_type="hpu", dtype=torch.bfloat16):  # Autocast BF16
+            outputs = model(inputs)
+            top1_probabilities, top1_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=1)
+            top1_probabilities = top1_probabilities.to("cpu").detach().numpy()
+            top1_class_indices = top1_class_indices.to("cpu").numpy()
+            expected_scores = np.array([21.406523])  # from CPU
+            expected_class = np.array([960])
+            self.assertEqual(top1_class_indices, expected_class)
+            self.assertLess(np.abs(top1_probabilities - expected_scores).max(), 1)
+
+    def test_inference_hpu_graphs(self):
+        model, processor = self.prepare_model_and_processor()
+        image = self.prepare_data()
+        inputs = processor(image).unsqueeze(0).to("hpu")
+
+        model = ht.hpu.wrap_in_hpu_graph(model)  # Apply graph
+
+        outputs = model(inputs)
+        top1_probabilities, top1_class_indices = torch.topk(outputs.softmax(dim=1) * 100, k=1)
+        top1_probabilities = top1_probabilities.to("cpu").detach().numpy()
+        top1_class_indices = top1_class_indices.to("cpu").numpy()
+        expected_scores = np.array([21.406523])  # from CPU
+        expected_class = np.array([960])
+        self.assertEqual(top1_class_indices, expected_class)
+        self.assertLess(np.abs(top1_probabilities - expected_scores).max(), 1)
+
+    def test_no_latency_regression_autocast(self):
+        warmup = 3
+        iterations = 20
+
+        model, processor = self.prepare_model_and_processor()
+        image = self.prepare_data()
+
+        model = ht.hpu.wrap_in_hpu_graph(model)
+
+        with torch.no_grad(), torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=True):
+            for i in range(warmup):
+                inputs = processor(image).unsqueeze(0).to("hpu")
+                _ = model(inputs)
+                torch.hpu.synchronize()
+
+            total_model_time = 0
+            for i in range(iterations):
+                inputs = processor(image).unsqueeze(0).to("hpu")
+                model_start_time = time.time()
+                _ = model(inputs)
+                torch.hpu.synchronize()
+                model_end_time = time.time()
+                total_model_time = total_model_time + (model_end_time - model_start_time)
+
+        latency = total_model_time * 1000 / iterations  # in terms of ms
+        self.assertLessEqual(latency, 1.05 * LATENCY_FastViT_BF16_GRAPH_BASELINE)