From b8bcde07407d81efc1fc14f1424dee2bb686259c Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Fri, 12 Apr 2024 01:30:22 +0000
Subject: [PATCH 01/11] Add the MC example

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/README.md          | 14 +++
 examples/stable-diffusion/run_distributed.py | 91 ++++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 examples/stable-diffusion/run_distributed.py

diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index accb8737f..144161ae5 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -276,3 +276,17 @@ python text_to_image_generation.py \
     --use_hpu_graphs \
     --gaudi_config Habana/stable-diffusion-2
 ```
+
+### Distributed inference with multiple HPUs
+
+Here is how to generate two images with two prompts on two HPUs :
+```python
+python ../gaudi_spawn.py \
+    --world_size 2 run_distributed.py \
+    --model_name_or_path runwayml/stable-diffusion-v1-5 \
+    --prompts "a cat" "a dog" \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --bf16
+```
\ No newline at end of file
diff --git a/examples/stable-diffusion/run_distributed.py b/examples/stable-diffusion/run_distributed.py
new file mode 100644
index 000000000..56cb53237
--- /dev/null
+++ b/examples/stable-diffusion/run_distributed.py
@@ -0,0 +1,91 @@
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Adapted from: https://huggingface.co/docs/diffusers/en/training/distributed_inference
+ - Use the GaudiStableDiffusionPipeline
+"""
+import torch
+import logging
+import argparse
+from accelerate import PartialState
+from optimum.habana.diffusers import GaudiStableDiffusionPipeline
+from optimum.habana.utils import set_seed
+
+logger = logging.getLogger(__name__)
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="runwayml/stable-diffusion-v1-5",
+        type=str,
+        help="Path to pre-trained model",
+    )
+    # Pipeline arguments
+    parser.add_argument(
+        "--prompts",
+        type=str,
+        nargs="*",
+        default=["a dog", "a cat"],
+        help="The prompt or prompts to guide the image generation.",
+    )
+    parser.add_argument(
+        "--num_images_per_prompt", type=int, default=1, help="The number of images to generate per prompt."
+    )
+    parser.add_argument("--seed", type=int, default=None, help="Random seed for initialization.")
+    parser.add_argument("--bf16", action="store_true", help="Whether to perform generation in bf16 precision.")
+    parser.add_argument(
+        "--gaudi_config",
+        type=str,
+        default="Habana/stable-diffusion",
+        help=(
+            "Name or path of the Gaudi configuration. In particular, it enables to specify how to apply Habana Mixed"
+            " Precision."
+        ),
+    )
+    # HPU-specific arguments
+    parser.add_argument("--use_habana", action="store_true", help="Use HPU.")
+    parser.add_argument(
+        "--use_hpu_graphs", action="store_true", help="Use HPU graphs on HPU. This should lead to faster generations."
+    )
+    args = parser.parse_args()
+     # Set seed before running the model
+    if args.seed:
+        logger.info("Set the random seed {}!".format(args.seed))
+        set_seed(args.seed)
+
+    kwargs = {
+        "use_habana": args.use_habana,
+        "use_hpu_graphs": args.use_hpu_graphs,
+        "gaudi_config": args.gaudi_config,
+        "torch_dtype": torch.bfloat16 if args.bf16 else None
+    }
+    print(f"kwargs={kwargs}")
+    pipeline = GaudiStableDiffusionPipeline.from_pretrained(
+        args.model_name_or_path, use_safetensors=True, **kwargs
+    )
+    distributed_state = PartialState()
+    kwargs= {
+        "num_images_per_prompt": args.num_images_per_prompt
+    }
+    with distributed_state.split_between_processes(args.prompts) as prompt:
+        outputs = pipeline(prompt, **kwargs)
+        for i, image in enumerate(outputs.images):
+            image.save(f"result_{distributed_state.process_index}_{i}.png")
+
+if __name__ == "__main__":
+    main()

From f4bd44048e42a08e6a31b87a805f1fe7350de23d Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Tue, 7 May 2024 03:42:17 +0000
Subject: [PATCH 02/11] Merge the run_distributed.py into
 text_to_image_generation.py

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/README.md           | 73 ++++++++++++---
 .../text_to_image_generation.py               | 91 ++++++++++---------
 2 files changed, 105 insertions(+), 59 deletions(-)

diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index 144161ae5..b30ab4133 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -60,11 +60,28 @@ python text_to_image_generation.py \
     --bf16
 ```
 
+### Distributed inference with multiple HPUs
+Here is how to generate images with two prompts on two HPUs:
+```python
+python ../gaudi_spawn.py \
+    --world_size 2 text_to_image_generation.py \
+    --model_name_or_path runwayml/stable-diffusion-v1-5 \
+    --prompts "An image of a squirrel in Picasso style" "A shiny flying horse taking off" \
+    --num_images_per_prompt 20 \
+    --batch_size 8 \
+    --image_save_dir /tmp/stable_diffusion_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --bf16 \
+    --seed 92 \
+    --distributed
+```
+
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.
 
-
 ### Stable Diffusion 2
 
 [Stable Diffusion 2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion_2) can also be used to generate images with this script. Here is an example for a single prompt:
@@ -173,6 +190,27 @@ python text_to_image_generation.py \
     --bf16
 ```
 
+Here is how to distributed generate SDXL images with two prompts on two HPUs:
+```python
+python ../gaudi_spawn.py \
+    --world_size 2 text_to_image_generation.py \
+    --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
+    --prompts "Sailing ship painting by Van Gogh" "A shiny flying horse taking off" \
+    --prompts_2 "Red tone" "Blue tone" \
+    --negative_prompts "Low quality" "Sketch" \
+    --negative_prompts_2 "Clouds" "Clouds" \
+    --num_images_per_prompt 20 \
+    --batch_size 8 \
+    --image_save_dir /tmp/stable_diffusion_xl_images \
+    --scheduler euler_discrete \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --bf16 \
+    --distributed
+```
+
+
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.
@@ -241,6 +279,25 @@ python text_to_image_generation.py \
     --bf16
 ```
 
+Here is how to generate images conditioned by canny edge model and with two prompts on two HPUs:
+```bash
+pip install -r requirements.txt
+python ../gaudi_spawn.py \
+    --world_size 2 text_to_image_generation.py \
+    --model_name_or_path runwayml/stable-diffusion-v1-5 \
+    --controlnet_model_name_or_path lllyasviel/sd-controlnet-canny \
+    --prompts "futuristic-looking woman" "a rusty robot" \
+    --control_image https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png \
+    --num_images_per_prompt 10 \
+    --batch_size 4 \
+    --image_save_dir /tmp/controlnet_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --bf16 \
+    --distributed
+```
+
 Here is how to generate images conditioned by open pose model:
 ```bash
 pip install -r requirements.txt
@@ -275,18 +332,4 @@ python text_to_image_generation.py \
     --use_habana \
     --use_hpu_graphs \
     --gaudi_config Habana/stable-diffusion-2
-```
-
-### Distributed inference with multiple HPUs
-
-Here is how to generate two images with two prompts on two HPUs :
-```python
-python ../gaudi_spawn.py \
-    --world_size 2 run_distributed.py \
-    --model_name_or_path runwayml/stable-diffusion-v1-5 \
-    --prompts "a cat" "a dog" \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --bf16
 ```
\ No newline at end of file
diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index 1f005c735..abdd95497 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -20,6 +20,7 @@
 
 import numpy as np
 import torch
+from accelerate import PartialState
 
 from optimum.habana.diffusers import (
     GaudiDDIMScheduler,
@@ -214,6 +215,8 @@ def main():
         type=int,
         help="Number of steps to capture for profiling.",
     )
+
+    parser.add_argument("--distributed", action="store_true", help="Use distributed inference on multi-cards")
     args = parser.parse_args()
 
     # Set image resolution
@@ -293,6 +296,25 @@ def main():
     if args.bf16:
         kwargs["torch_dtype"] = torch.bfloat16
 
+    negative_prompts = args.negative_prompts
+    if args.distributed:
+        distributed_state = PartialState()
+        if args.negative_prompts is not None:
+            with distributed_state.split_between_processes(args.negative_prompts) as negative_prompt:
+                negative_prompts = negative_prompt
+
+    infer_kwargs = {
+        "num_images_per_prompt": args.num_images_per_prompt,
+        "batch_size": args.batch_size,
+        "num_inference_steps": args.num_inference_steps,
+        "guidance_scale": args.guidance_scale,
+        "negative_prompt": negative_prompts,
+        "eta": args.eta,
+        "output_type": args.output_type,
+        "profiling_warmup_steps": args.profiling_warmup_steps,
+        "profiling_steps": args.profiling_steps
+    }
+
     # Generate images
     if args.control_image is not None:
         model_dtype = torch.bfloat16 if args.bf16 else None
@@ -305,21 +327,8 @@ def main():
 
         # Set seed before running the model
         set_seed(args.seed)
+        infer_kwargs["image"] = control_image
 
-        outputs = pipeline(
-            prompt=args.prompts,
-            image=control_image,
-            num_images_per_prompt=args.num_images_per_prompt,
-            batch_size=args.batch_size,
-            num_inference_steps=args.num_inference_steps,
-            guidance_scale=args.guidance_scale,
-            negative_prompt=args.negative_prompts,
-            eta=args.eta,
-            output_type=args.output_type,
-            profiling_warmup_steps=args.profiling_warmup_steps,
-            profiling_steps=args.profiling_steps,
-            **res,
-        )
     elif sdxl:
         pipeline = GaudiStableDiffusionXLPipeline.from_pretrained(
             args.model_name_or_path,
@@ -329,21 +338,18 @@ def main():
         # Set seed before running the model
         set_seed(args.seed)
 
-        outputs = pipeline(
-            prompt=args.prompts,
-            prompt_2=args.prompts_2,
-            num_images_per_prompt=args.num_images_per_prompt,
-            batch_size=args.batch_size,
-            num_inference_steps=args.num_inference_steps,
-            guidance_scale=args.guidance_scale,
-            negative_prompt=args.negative_prompts,
-            negative_prompt_2=args.negative_prompts_2,
-            eta=args.eta,
-            output_type=args.output_type,
-            profiling_warmup_steps=args.profiling_warmup_steps,
-            profiling_steps=args.profiling_steps,
-            **res,
-        )
+        prompts_2 = args.prompts_2
+        negative_prompts_2 = args.negative_prompts_2
+        if args.distributed and args.prompts_2 is not None:
+            with distributed_state.split_between_processes(args.prompts_2) as prompt_2:
+                prompts_2 = prompt_2
+        if args.distributed and args.negative_prompts_2 is not None:
+            with distributed_state.split_between_processes(args.negative_prompts_2) as negative_prompt_2:
+                negative_prompts_2 = negative_prompt_2
+
+        infer_kwargs["prompt_2"] = prompts_2
+        infer_kwargs["negative_prompt_2"] = negative_prompts_2
+
     else:
         pipeline = GaudiStableDiffusionPipeline.from_pretrained(
             args.model_name_or_path,
@@ -352,29 +358,26 @@ def main():
 
         # Set seed before running the model
         set_seed(args.seed)
-
-        outputs = pipeline(
-            prompt=args.prompts,
-            num_images_per_prompt=args.num_images_per_prompt,
-            batch_size=args.batch_size,
-            num_inference_steps=args.num_inference_steps,
-            guidance_scale=args.guidance_scale,
-            negative_prompt=args.negative_prompts,
-            eta=args.eta,
-            output_type=args.output_type,
-            profiling_warmup_steps=args.profiling_warmup_steps,
-            profiling_steps=args.profiling_steps,
-            **res,
-        )
+    if args.distributed:
+        with distributed_state.split_between_processes(args.prompts) as prompt:
+            outputs = pipeline(prompt=prompt, **infer_kwargs, **res)
+    else:
+        outputs = pipeline(prompt=args.prompts, **infer_kwargs, **res)
 
     # Save the pipeline in the specified directory if not None
     if args.pipeline_save_dir is not None:
-        pipeline.save_pretrained(args.pipeline_save_dir)
+        save_dir = args.pipeline_save_dir
+        if args.distributed:
+            save_dir = f"{args.pipeline_save_dir}_{distributed_state.process_index}"
+        pipeline.save_pretrained(save_dir)
 
     # Save images in the specified directory if not None and if they are in PIL format
     if args.image_save_dir is not None:
         if args.output_type == "pil":
             image_save_dir = Path(args.image_save_dir)
+            if args.distributed:
+                image_save_dir = Path(f"{image_save_dir}_{distributed_state.process_index}")
+
             image_save_dir.mkdir(parents=True, exist_ok=True)
             logger.info(f"Saving images in {image_save_dir.resolve()}...")
             if args.ldm3d:

From d941c871b4b6508628c614069177bf5a89bc6bd7 Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Tue, 7 May 2024 03:44:58 +0000
Subject: [PATCH 03/11] Remove the run_distributed.py

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/run_distributed.py | 91 --------------------
 1 file changed, 91 deletions(-)
 delete mode 100644 examples/stable-diffusion/run_distributed.py

diff --git a/examples/stable-diffusion/run_distributed.py b/examples/stable-diffusion/run_distributed.py
deleted file mode 100644
index 56cb53237..000000000
--- a/examples/stable-diffusion/run_distributed.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Adapted from: https://huggingface.co/docs/diffusers/en/training/distributed_inference
- - Use the GaudiStableDiffusionPipeline
-"""
-import torch
-import logging
-import argparse
-from accelerate import PartialState
-from optimum.habana.diffusers import GaudiStableDiffusionPipeline
-from optimum.habana.utils import set_seed
-
-logger = logging.getLogger(__name__)
-
-def main():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="runwayml/stable-diffusion-v1-5",
-        type=str,
-        help="Path to pre-trained model",
-    )
-    # Pipeline arguments
-    parser.add_argument(
-        "--prompts",
-        type=str,
-        nargs="*",
-        default=["a dog", "a cat"],
-        help="The prompt or prompts to guide the image generation.",
-    )
-    parser.add_argument(
-        "--num_images_per_prompt", type=int, default=1, help="The number of images to generate per prompt."
-    )
-    parser.add_argument("--seed", type=int, default=None, help="Random seed for initialization.")
-    parser.add_argument("--bf16", action="store_true", help="Whether to perform generation in bf16 precision.")
-    parser.add_argument(
-        "--gaudi_config",
-        type=str,
-        default="Habana/stable-diffusion",
-        help=(
-            "Name or path of the Gaudi configuration. In particular, it enables to specify how to apply Habana Mixed"
-            " Precision."
-        ),
-    )
-    # HPU-specific arguments
-    parser.add_argument("--use_habana", action="store_true", help="Use HPU.")
-    parser.add_argument(
-        "--use_hpu_graphs", action="store_true", help="Use HPU graphs on HPU. This should lead to faster generations."
-    )
-    args = parser.parse_args()
-     # Set seed before running the model
-    if args.seed:
-        logger.info("Set the random seed {}!".format(args.seed))
-        set_seed(args.seed)
-
-    kwargs = {
-        "use_habana": args.use_habana,
-        "use_hpu_graphs": args.use_hpu_graphs,
-        "gaudi_config": args.gaudi_config,
-        "torch_dtype": torch.bfloat16 if args.bf16 else None
-    }
-    print(f"kwargs={kwargs}")
-    pipeline = GaudiStableDiffusionPipeline.from_pretrained(
-        args.model_name_or_path, use_safetensors=True, **kwargs
-    )
-    distributed_state = PartialState()
-    kwargs= {
-        "num_images_per_prompt": args.num_images_per_prompt
-    }
-    with distributed_state.split_between_processes(args.prompts) as prompt:
-        outputs = pipeline(prompt, **kwargs)
-        for i, image in enumerate(outputs.images):
-            image.save(f"result_{distributed_state.process_index}_{i}.png")
-
-if __name__ == "__main__":
-    main()

From d3fe325be17ca927515930f5b3edb52cb8ddce8a Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Tue, 7 May 2024 03:48:05 +0000
Subject: [PATCH 04/11] Change the command format

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index b30ab4133..c4a3b9c5f 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -62,7 +62,7 @@ python text_to_image_generation.py \
 
 ### Distributed inference with multiple HPUs
 Here is how to generate images with two prompts on two HPUs:
-```python
+```bash
 python ../gaudi_spawn.py \
     --world_size 2 text_to_image_generation.py \
     --model_name_or_path runwayml/stable-diffusion-v1-5 \
@@ -191,7 +191,7 @@ python text_to_image_generation.py \
 ```
 
 Here is how to distributed generate SDXL images with two prompts on two HPUs:
-```python
+```bash
 python ../gaudi_spawn.py \
     --world_size 2 text_to_image_generation.py \
     --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \

From f2539354e861f3a272382636aeeb2660cdc007ac Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Wed, 8 May 2024 01:34:24 +0000
Subject: [PATCH 05/11]  make style reformat

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/text_to_image_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index abdd95497..b72657021 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -312,7 +312,7 @@ def main():
         "eta": args.eta,
         "output_type": args.output_type,
         "profiling_warmup_steps": args.profiling_warmup_steps,
-        "profiling_steps": args.profiling_steps
+        "profiling_steps": args.profiling_steps,
     }
 
     # Generate images

From 8dd07f079a729f6f034e9ce56dae0302d44d8a54 Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Wed, 8 May 2024 08:37:49 +0000
Subject: [PATCH 06/11] Add the LDM3D distributed inference command

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/README.md | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index c4a3b9c5f..592d2f2ad 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -126,6 +126,23 @@ python text_to_image_generation.py \
     --gaudi_config Habana/stable-diffusion-2 \
     --ldm3d
 ```
+Here is how to generate images and depth maps with two prompts on two HPUs:
+```bash
+python ../gaudi_spawn.py \
+    --world_size 2 text_to_image_generation.py \
+    --model_name_or_path "Intel/ldm3d-4c" \
+    --prompts "An image of a squirrel in Picasso style" "A shiny flying horse taking off" \
+    --num_images_per_prompt 10 \
+    --batch_size 2 \
+    --height 768 \
+    --width 768 \
+    --image_save_dir /tmp/stable_diffusion_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion-2 \
+    --ldm3d \
+    --distributed
+```
 
 > There are three different checkpoints for LDM3D:
 > - use [original checkpoint](https://huggingface.co/Intel/ldm3d) to generate outputs from the paper
@@ -190,7 +207,7 @@ python text_to_image_generation.py \
     --bf16
 ```
 
-Here is how to distributed generate SDXL images with two prompts on two HPUs:
+Here is how to generate SDXL images with two prompts on two HPUs:
 ```bash
 python ../gaudi_spawn.py \
     --world_size 2 text_to_image_generation.py \

From c70614616681ba85742ed0ed187cd139fa90a1ab Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Thu, 9 May 2024 02:22:29 +0000
Subject: [PATCH 07/11] align arguments of test command

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index 592d2f2ad..4d2543f30 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -68,13 +68,12 @@ python ../gaudi_spawn.py \
     --model_name_or_path runwayml/stable-diffusion-v1-5 \
     --prompts "An image of a squirrel in Picasso style" "A shiny flying horse taking off" \
     --num_images_per_prompt 20 \
-    --batch_size 8 \
+    --batch_size 4 \
     --image_save_dir /tmp/stable_diffusion_images \
     --use_habana \
     --use_hpu_graphs \
     --gaudi_config Habana/stable-diffusion \
     --bf16 \
-    --seed 92 \
     --distributed
 ```
 

From e3ad3309b61472bfbcae3b157124b5b23f97a33a Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Mon, 17 Jun 2024 02:16:25 +0000
Subject: [PATCH 08/11] refine the patch

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/text_to_image_generation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index d2d295046..164adac65 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -336,9 +336,10 @@ def main():
         "output_type": args.output_type,
         "profiling_warmup_steps": args.profiling_warmup_steps,
         "profiling_steps": args.profiling_steps,
-        "throughput_warmup_steps": args.throughput_warmup_steps,
     }
 
+    if args.throughput_warmup_steps is not None:
+        infer_kwargs["throughput_warmup_steps"] = args.throughput_warmup_steps
 
     # Generate images
     if args.control_image is not None:

From 54e9b0dbe6ee1b074335a2a56061a7e2eab7ffa1 Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Mon, 17 Jun 2024 02:18:13 +0000
Subject: [PATCH 09/11] Fix errors of make style

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/text_to_image_generation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index 164adac65..70280666f 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -398,14 +398,13 @@ def main():
             )
             pipeline.text_encoder = pipeline.text_encoder.merge_and_unload()
         set_seed(args.seed)
-  
+
     if args.distributed:
         with distributed_state.split_between_processes(args.prompts) as prompt:
             outputs = pipeline(prompt=prompt, **infer_kwargs, **res)
     else:
         outputs = pipeline(prompt=args.prompts, **infer_kwargs, **res)
 
-
     # Save the pipeline in the specified directory if not None
     if args.pipeline_save_dir is not None:
         save_dir = args.pipeline_save_dir

From 861bec895c1654c2d4f515b939a3154c2b827624 Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Sun, 23 Jun 2024 11:57:54 +0000
Subject: [PATCH 10/11] Remove the empty lines

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 examples/stable-diffusion/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index 0033770a8..1ac476120 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -225,8 +225,6 @@ python ../gaudi_spawn.py \
     --bf16 \
     --distributed
 ```
-
-
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.

From 8993c64a7a800c29133029e2f22a22c33b765eaa Mon Sep 17 00:00:00 2001
From: yuanwu <yuan.wu@intel.com>
Date: Tue, 25 Jun 2024 12:33:26 +0000
Subject: [PATCH 11/11] Use kwargs_call

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 .../text_to_image_generation.py               | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index 70280666f..9dd8fe51a 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -243,10 +243,10 @@ def main():
     args = parser.parse_args()
 
     # Set image resolution
-    res = {}
+    kwargs_call = {}
     if args.width > 0 and args.height > 0:
-        res["width"] = args.width
-        res["height"] = args.height
+        kwargs_call["width"] = args.width
+        kwargs_call["height"] = args.height
 
     # ControlNet
     if args.control_image is not None:
@@ -326,7 +326,7 @@ def main():
             with distributed_state.split_between_processes(args.negative_prompts) as negative_prompt:
                 negative_prompts = negative_prompt
 
-    infer_kwargs = {
+    kwargs_common = {
         "num_images_per_prompt": args.num_images_per_prompt,
         "batch_size": args.batch_size,
         "num_inference_steps": args.num_inference_steps,
@@ -338,8 +338,9 @@ def main():
         "profiling_steps": args.profiling_steps,
     }
 
+    kwargs_call.update(kwargs_common)
     if args.throughput_warmup_steps is not None:
-        infer_kwargs["throughput_warmup_steps"] = args.throughput_warmup_steps
+        kwargs_call["throughput_warmup_steps"] = args.throughput_warmup_steps
 
     # Generate images
     if args.control_image is not None:
@@ -355,7 +356,7 @@ def main():
 
         # Set seed before running the model
         set_seed(args.seed)
-        infer_kwargs["image"] = control_image
+        kwargs_call["image"] = control_image
 
     elif sdxl:
         pipeline = GaudiStableDiffusionXLPipeline.from_pretrained(
@@ -377,8 +378,8 @@ def main():
             with distributed_state.split_between_processes(args.negative_prompts_2) as negative_prompt_2:
                 negative_prompts_2 = negative_prompt_2
 
-        infer_kwargs["prompt_2"] = prompts_2
-        infer_kwargs["negative_prompt_2"] = negative_prompts_2
+        kwargs_call["prompt_2"] = prompts_2
+        kwargs_call["negative_prompt_2"] = negative_prompts_2
 
     else:
         pipeline = GaudiStableDiffusionPipeline.from_pretrained(
@@ -401,9 +402,9 @@ def main():
 
     if args.distributed:
         with distributed_state.split_between_processes(args.prompts) as prompt:
-            outputs = pipeline(prompt=prompt, **infer_kwargs, **res)
+            outputs = pipeline(prompt=prompt, **kwargs_call)
     else:
-        outputs = pipeline(prompt=args.prompts, **infer_kwargs, **res)
+        outputs = pipeline(prompt=args.prompts, **kwargs_call)
 
     # Save the pipeline in the specified directory if not None
     if args.pipeline_save_dir is not None: