diff --git a/aa_torch_fx.py b/aa_torch_fx.py deleted file mode 100644 index 339d33d1598..00000000000 --- a/aa_torch_fx.py +++ /dev/null @@ -1,456 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import copy -import re -import subprocess -import time -import warnings -from itertools import islice -from pathlib import Path - -import numpy as np -import openvino as ov -import openvino.torch # noqa -import pandas as pd -import torch -import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq -import torchvision.models as models -from sklearn.metrics import accuracy_score -from torch._export import capture_pre_autograd_graph -from torch.ao.quantization.quantize_pt2e import convert_pt2e -from torch.ao.quantization.quantize_pt2e import prepare_pt2e -from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer -from torch.fx.passes.graph_drawer import FxGraphDrawer -from torch.jit import TracerWarning -from torchao.utils import benchmark_model as ao_benchmark_model -from torchvision import datasets -from transformers import AutoImageProcessor -from transformers import AutoModelForImageClassification - -import nncf -from nncf.common.logging.track_progress import track -from nncf.common.quantization.structs import QuantizationPreset # noqa -from nncf.parameters import ModelType -from nncf.torch.dynamic_graph.patch_pytorch import disable_patching - -warnings.filterwarnings("ignore", category=TracerWarning) -warnings.filterwarnings("ignore", category=UserWarning) - -DATASET_IMAGENET = "/home/dlyakhov/datasets/imagenet/val" - -hf_models = () - - -def hf_model_builder(model_id: str): - def build(weights): - processor = AutoImageProcessor.from_pretrained(model_id) - model = AutoModelForImageClassification.from_pretrained(model_id) - - class ModelWithProcessing(torch.nn.Module): - def __init__(self, processor, model): - super().__init__() - self.processor = processor - self.model = model - - def forward(self, x): - processed_input = processor(x, return_tensors="pt") - return model(processed_input) - - # return ModelWithProcessing(processor, model) - return model - - class DummyWeights: - def transforms(self): - return models.ResNet18_Weights.DEFAULT.transforms() - - @property - def meta(self): - return {} - - return build, DummyWeights() - - -MODELS_DICT = { - "vit_h_14": (models.vit_h_14, models.ViT_H_14_Weights.DEFAULT), - "vit_b_16": (models.vit_b_16, models.ViT_B_16_Weights.DEFAULT), - "swin_v2_t": (models.swin_v2_t, models.Swin_V2_T_Weights.DEFAULT), - "swin_v2_s": (models.swin_v2_s, models.Swin_V2_S_Weights.DEFAULT), - "resnet18": (models.resnet18, models.ResNet18_Weights.DEFAULT), - "resnet50": (models.resnet50, models.ResNet50_Weights.DEFAULT), - "mobilenet_v2": (models.mobilenet_v2, models.MobileNet_V2_Weights.DEFAULT), - "mobilenet_v3_small": (models.mobilenet_v3_small, models.MobileNet_V3_Small_Weights.DEFAULT), - "mobilenet_v3_large": (models.mobilenet_v3_large, models.MobileNet_V3_Large_Weights.DEFAULT), - # "densenet161": (models.densenet161, models.DenseNet161_Weights.DEFAULT), - "vgg16": (models.vgg16, models.VGG16_Weights.DEFAULT), - "efficientnet_b7": (models.efficientnet_b7, models.EfficientNet_B7_Weights.DEFAULT), - "inception_v3": (models.inception_v3, models.Inception_V3_Weights.DEFAULT), - "regnet_x_32gf": (models.regnet_x_32gf, models.RegNet_X_32GF_Weights.DEFAULT), - # "google/vit-base-patch16-224": hf_model_builder("google/vit-base-patch16-224"), - # "convnext_large": (models.convnext_large, models.ConvNeXt_Large_Weights.DEFAULT), - # "convnext_small": (models.convnext_small, models.ConvNeXt_Small_Weights.DEFAULT), -} - - -def measure_time(model, example_inputs, num_iters=1000): - with torch.no_grad(): - model(*example_inputs) - total_time = 0 - for i in range(0, num_iters): - start_time = time.time() - model(*example_inputs) - total_time += time.time() - start_time - average_time = (total_time / num_iters) * 1000 - return average_time - - -def measure_time_ov(model, example_inputs, num_iters=1000): - ie = ov.Core() - compiled_model = ie.compile_model(model, "CPU") - infer_request = compiled_model.create_infer_request() - infer_request.infer(example_inputs) - total_time = 0 - for i in range(0, num_iters): - start_time = time.time() - infer_request.infer(example_inputs) - total_time += time.time() - start_time - average_time = (total_time / num_iters) * 1000 - return average_time - - -def quantize(model, example_inputs, calibration_dataset, subset_size=300): - with torch.no_grad(): - exported_model = capture_pre_autograd_graph(model, example_inputs) - - quantizer = X86InductorQuantizer() - quantizer.set_global(xiq.get_default_x86_inductor_quantization_config()) - - prepared_model = prepare_pt2e(exported_model, quantizer) - from tqdm import tqdm - - for inp, _ in islice(tqdm(calibration_dataset), subset_size): - prepared_model(inp) - converted_model = convert_pt2e(prepared_model) - return converted_model - - -def validate(model, val_loader, subset_size=None): - dataset_size = len(val_loader) - - predictions = np.zeros((dataset_size)) - references = -1 * np.ones((dataset_size)) - - with track(total=dataset_size, description="Validation") as pbar: - - for i, (images, target) in enumerate(val_loader): - if subset_size is not None and i >= subset_size: - break - - output_data = model(images).detach().numpy() - predicted_label = np.argmax(output_data, axis=1) - predictions[i] = predicted_label.item() - references[i] = target - pbar.progress.update(pbar.task, advance=1) - acc_top1 = accuracy_score(predictions, references) * 100 - print(acc_top1) - return acc_top1 - - -def validate_ov(model, val_loader): - dataset_size = len(val_loader) - - # Initialize result tensors for async inference support. - predictions = np.zeros((dataset_size)) - references = -1 * np.ones((dataset_size)) - - core = ov.Core() - compiled_model = core.compile_model(model) - - infer_queue = ov.AsyncInferQueue(compiled_model, 4) - with track(total=dataset_size, description="Validation") as pbar: - - def process_result(request, userdata): - output_data = request.get_output_tensor().data - predicted_label = np.argmax(output_data, axis=1) - predictions[userdata] = predicted_label.item() - pbar.progress.update(pbar.task, advance=1) - - infer_queue.set_callback(process_result) - - for i, (images, target) in enumerate(val_loader): - # W/A for memory leaks when using torch DataLoader and OpenVINO - image_copies = copy.deepcopy(images.numpy()) - infer_queue.start_async(image_copies, userdata=i) - references[i] = target - - infer_queue.wait_all() - - acc_top1 = accuracy_score(predictions, references) * 100 - print(acc_top1) - return acc_top1 - - -def run_benchmark(model_path: Path, shape) -> float: - command = f"benchmark_app -m {model_path} -d CPU -api async -t 15" - command += f' -shape="[{",".join(str(x) for x in shape)}]"' - cmd_output = subprocess.check_output(command, shell=True) # nosec - match = re.search(r"Throughput\: (.+?) FPS", str(cmd_output)) - return float(match.group(1)) - - -def torch_ao_sq_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input): - import torch - from torchao.quantization.smoothquant import smooth_fq_linear_to_inference - from torchao.quantization.smoothquant import swap_linear_with_smooth_fq_linear - - # Fuse the int8*int8 -> int32 matmul and subsequent mul op avoiding materialization of the int32 intermediary tensor - torch._inductor.config.force_fuse_int_mm_with_mul = True - - # plug in your model - # model = torch.compile(pt_model) - model = pt_model - - # convert linear modules to smoothquant - # linear module in calibration mode - swap_linear_with_smooth_fq_linear(model) - - # Create a data loader for calibration - calibration_loader = val_loader - - # Calibrate the model - model.train() - from tqdm import tqdm - - for batch in tqdm(islice(calibration_loader, 300)): - inputs = batch[0] - model(inputs) - - # set it to inference mode - smooth_fq_linear_to_inference(model) - - # compile the model to improve performance - model = torch.compile(model, mode="max-autotune") - acc1_quant_model = validate(model, val_loader) - print(f"torch ao metric acc@1: {acc1_quant_model}") - result["torch_ao_quant_model_acc"] = acc1_quant_model - - latency = ao_benchmark_model(model, 20, example_input) - print(f"torch ao latency: {latency}") - result["torch_ao_quant_model_latency"] = latency - - -def nncf_fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input): - with disable_patching(): - with torch.no_grad(): - exported_model = capture_pre_autograd_graph(pt_model, (example_input,)) - - def transform(x): - return x[0] - - quant_fx_model = nncf.quantize( - exported_model, nncf.Dataset(val_loader, transform_func=transform), model_type=ModelType.TRANSFORMER - ) - quant_compile_model = torch.compile(quant_fx_model, backend="openvino") - - # acc1_quant_model = validate(quant_compile_model, val_loader) - acc1_quant_model = -1.0 - latency_fx = measure_time(quant_compile_model, (example_input,)) - print(f"latency: {latency_fx}") - result["acc1_nncf_fx_quant_model"] = acc1_quant_model - result["torch_compile_ov_latency_nncf_fx_quant_model"] = latency_fx - - g = FxGraphDrawer(quant_compile_model, f"b_nncf_{pt_model.__class__.__name__}_int8") - g.get_dot_graph().write_svg(f"b_nncf_{pt_model.__class__.__name__}_int8.svg") - - # EXPORT TO OV - exported_model = torch.export.export(quant_compile_model, (example_input,)) - ov_quant_model = ov.convert_model(exported_model, example_input=example_input) - quant_file_path = output_dir / "quant.xml" - ov.save_model(ov_quant_model, quant_file_path) - - fps = run_benchmark(quant_file_path, shape_input) - print(f"fps: {fps}") - result["ov_fps_nncf_fx_quant_model"] = fps - - -def fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input): - with disable_patching(): - fp32_pt_model = copy.deepcopy(pt_model) - fp32_compile_model = torch.compile(fp32_pt_model, backend="openvino") - - quant_pt_model = quantize(fp32_compile_model, (example_input,), val_loader) - quant_compile_model = torch.compile(quant_pt_model, backend="openvino") - - g = FxGraphDrawer(quant_pt_model, f"b_pt_{pt_model.__class__.__name__}_int8") - g.get_dot_graph().write_svg(f"b_pt_{pt_model.__class__.__name__}_int8.svg") - - acc1_quant_model = validate(quant_compile_model, val_loader) - result["acc1_quant_model"] = acc1_quant_model - - latency_fx = measure_time(quant_compile_model, (example_input,)) - print(f"latency: {latency_fx}") - result["torch_compile_latency_fps_quant_model"] = latency_fx - - -def nncf_pt_2_ov_quantization(pt_model, val_loader, example_input, output_dir, result, shape_input): - def transform(x): - return x[0] - - nncf_model = nncf.quantize(copy.deepcopy(pt_model), nncf.Dataset(val_loader, transform_func=transform)) - - ov_nncf_model = ov.convert_model(nncf_model, example_input=example_input) - nncf_pt_file_path = output_dir / "nncf_pt.xml" - ov.save_model(ov_nncf_model, nncf_pt_file_path) - acc1_nncf_pt = validate_ov(ov_nncf_model, val_loader) - result["acc1_nncf_pt"] = acc1_nncf_pt - fps = run_benchmark(nncf_pt_file_path, shape_input) - print(f"fps: {fps}") - result["ov_fps_nncf_pt"] = fps - - -def nncf_ov_2_ov_quantization(ov_fp32_model, val_loader, output_dir, result, shape_input): - def transform(x): - return np.array(x[0]) - - from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters - from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters - - advanced_params = AdvancedQuantizationParameters() - # for sq_param in [-1, 0.15, 0.5, 0.75]: - for sq_param in [0.95]: - advanced_params.smooth_quant_alphas = AdvancedSmoothQuantParameters(matmul=sq_param) - - from copy import deepcopy - - fast_bias_correction = True - nncf_ov_int8_model = nncf.quantize( - deepcopy(ov_fp32_model), - nncf.Dataset(val_loader, transform_func=transform), - fast_bias_correction=fast_bias_correction, - model_type=ModelType.TRANSFORMER, - preset=QuantizationPreset.MIXED, - advanced_parameters=advanced_params, - ) - acc1_nncf_ov = validate_ov(nncf_ov_int8_model, val_loader) - result[f"acc1_nncf_ov_{sq_param}"] = acc1_nncf_ov - for precision, model in (("int8", nncf_ov_int8_model), ("fp32", ov_fp32_model)): - nncf_ov_file_path = output_dir / f"nncf_ov_{precision}.xml" - ov.save_model(model, nncf_ov_file_path) - fps = run_benchmark(nncf_ov_file_path, shape_input) - print(f"fps_{precision}: {fps} {sq_param}") - result[f"ov_fps_nncf_ov_{precision}_{sq_param}"] = fps - - latency = measure_time_ov(model, next(iter(val_loader))[0], num_iters=10_000) - print(f"latency_{precision}: {latency}") - result[f"ov_latency_nncf_ov_{precision}_{sq_param}"] = latency - - -def process_model(model_name: str): - - result = {"name": model_name} - model_cls, model_weights = MODELS_DICT[model_name] - output_dir = Path("models") / model_name - output_dir.mkdir(exist_ok=True) - ############################################################## - # Prepare dataset - ############################################################## - - val_dataset = datasets.ImageFolder(root=DATASET_IMAGENET, transform=model_weights.transforms()) - val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False) - - ############################################################## - # Prepare original model - ############################################################## - - pt_model = model_cls(weights=model_weights) - pt_model = pt_model.eval() - example_input = next(iter(val_loader))[0] - shape_input = list(example_input.shape) - ############################################################## - # Process FP32 Model - ############################################################## - - fp32_pt_model = copy.deepcopy(pt_model) - - orig_infer_acc1 = model_weights.meta.get("_metrics", {}).get("ImageNet-1K", {}).get("acc@1") - print(f"fp32 model metric: {orig_infer_acc1}") - # orig_infer_acc1 = validate(fp32_pt_model, val_loader) - result["acc1_fp32_openvino"] = orig_infer_acc1 - - fp32_pt_model = torch.export.export(fp32_pt_model, (example_input,)) - ov_fp32_model = ov.convert_model(fp32_pt_model, example_input=example_input) - ov_fp32_file_path = None - ov_fp32_file_path = output_dir / "fp32.xml" - ov.save_model(ov_fp32_model, ov_fp32_file_path) - # result["fps_fp32_openvino"] = run_benchmark(ov_fp32_file_path, shape_input) - # print(f"fps_fp32_openvino {result['fps_fp32_openvino']}") - - del fp32_pt_model - ############################################################## - # Process Torch AO Quantize with SQ - ############################################################## - # torch_ao_sq_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input) - - ############################################################## - # with torch.no_grad(): - # exported_model = capture_pre_autograd_graph(pt_model, (example_input,)) - # latency_fx = measure_time(torch.compile(exported_model), (example_input,)) - # print(f"latency: {latency_fx}") - ############################################################# - - ############################################################## - # Process PT Quantize - ############################################################## - fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input) - - ############################################################## - # Process NNCF FX Quantize - ############################################################## - # nncf_fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input) - - ############################################################## - # Process NNCF Quantize by PT - ############################################################## - # nncf_pt_2_ov_quantization(pt_model, val_loader, example_input, output_dir, result, shape_input) - - ############################################################## - # Process NNCF Quantize by OV - ############################################################## - # nncf_ov_2_ov_quantization(ov_fp32_model, val_loader, output_dir, result, shape_input) - - print(result) - return result - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", help="torchvision model name", type=str, default="all") - parser.add_argument("--file_name", help="output csv file_name", type=str, default="result.csv") - - args = parser.parse_args() - - results_list = [] - if args.model == "all": - for model_name in MODELS_DICT: - print("---------------------------------------------------") - print(f"name: {model_name}") - results_list.append(process_model(model_name)) - else: - results_list.append(process_model(args.model)) - - df = pd.DataFrame(results_list) - print(df) - df.to_csv(args.file_name) - - -if __name__ == "__main__": - main() diff --git a/examples/llm_compression/openvino/tiny_llama/main.py b/examples/llm_compression/openvino/tiny_llama/main.py index e5f3893f1ab..f2be54ce1aa 100644 --- a/examples/llm_compression/openvino/tiny_llama/main.py +++ b/examples/llm_compression/openvino/tiny_llama/main.py @@ -11,12 +11,12 @@ import time from functools import partial +import datasets import numpy as np import openvino as ov from optimum.intel.openvino import OVModelForCausalLM from transformers import AutoTokenizer -import datasets import nncf diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py index e34b09bc2f9..b3fbce5722b 100644 --- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py +++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py @@ -17,12 +17,12 @@ import numpy as np import openvino as ov +from datasets import load_dataset from optimum.intel import OVModelForCausalLM from transformers import AutoTokenizer from whowhatbench import Evaluator import nncf -from datasets import load_dataset from nncf.common.logging import nncf_logger DataItem = TypeVar("DataItem") diff --git a/nncf/quantization/algorithms/min_max/torch_fx_backend.py b/nncf/quantization/algorithms/min_max/torch_fx_backend.py index c5403386441..bdeed5343c8 100644 --- a/nncf/quantization/algorithms/min_max/torch_fx_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_fx_backend.py @@ -104,7 +104,7 @@ def group_conv_metatypes(self) -> List[OperatorMetatype]: @property def scaled_dot_product_attention_metatypes(self) -> List[OperatorMetatype]: - return [] + return [om.PTScaledDotProductAttentionMetatype] @property def scales_unification_map(self) -> Dict[OperatorMetatype, OperatorMetatype]: diff --git a/tests/torch/sparsity/movement/helpers/run_recipe.py b/tests/torch/sparsity/movement/helpers/run_recipe.py index 383552932d5..77b3140a967 100644 --- a/tests/torch/sparsity/movement/helpers/run_recipe.py +++ b/tests/torch/sparsity/movement/helpers/run_recipe.py @@ -20,6 +20,7 @@ import torch.nn import torch.nn.functional as F import torch.utils.data +from datasets import Dataset from transformers import AutoModelForAudioClassification from transformers import AutoModelForImageClassification from transformers import AutoModelForSequenceClassification @@ -33,7 +34,6 @@ from transformers import SwinConfig from transformers import Wav2Vec2Config -from datasets import Dataset from nncf import NNCFConfig from nncf.experimental.torch.sparsity.movement.scheduler import MovementSchedulerParams from nncf.torch.dynamic_graph.io_handling import FillerInputElement diff --git a/tests/torch/sparsity/movement/helpers/trainer.py b/tests/torch/sparsity/movement/helpers/trainer.py index 2af37c5b2f4..89ffeb6c865 100644 --- a/tests/torch/sparsity/movement/helpers/trainer.py +++ b/tests/torch/sparsity/movement/helpers/trainer.py @@ -14,6 +14,7 @@ import numpy as np import torch +from datasets import Dataset # pylint: disable=no-name-in-module from transformers import TrainingArguments from transformers.trainer import Trainer from transformers.trainer_callback import TrainerCallback @@ -21,7 +22,6 @@ from transformers.trainer_callback import TrainerState from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR -from datasets import Dataset # pylint: disable=no-name-in-module from nncf.api.compression import CompressionAlgorithmController from nncf.common.compression import BaseCompressionAlgorithmController from nncf.common.utils.tensorboard import prepare_for_tensorboard diff --git a/tests/torch/sparsity/movement/test_model_saving.py b/tests/torch/sparsity/movement/test_model_saving.py index c7949afeb82..27a9655591a 100644 --- a/tests/torch/sparsity/movement/test_model_saving.py +++ b/tests/torch/sparsity/movement/test_model_saving.py @@ -18,6 +18,7 @@ import pytest import torch from addict import Dict +from datasets import Dataset from onnx import numpy_helper from openvino._offline_transformations import apply_fused_names_cleanup from openvino._offline_transformations import apply_moc_transformations @@ -28,7 +29,6 @@ from scipy.special import softmax from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR -from datasets import Dataset from nncf.torch import create_compressed_model from nncf.torch.checkpoint_loading import load_state from tests.torch.helpers import PTTensorListComparator diff --git a/tests/torch/sparsity/movement/training_scripts/run_glue.py b/tests/torch/sparsity/movement/training_scripts/run_glue.py index d0f5b14269e..360832a5bb7 100644 --- a/tests/torch/sparsity/movement/training_scripts/run_glue.py +++ b/tests/torch/sparsity/movement/training_scripts/run_glue.py @@ -12,13 +12,12 @@ from pathlib import Path from typing import Dict, List, Optional, Tuple +import datasets import evaluate import jstyleson import numpy as np from transformers.training_args import ParallelMode -import datasets - # isort: off from nncf import NNCFConfig from nncf.api.compression import CompressionAlgorithmController diff --git a/torch_compile_ex_release.py b/torch_compile_ex_release.py deleted file mode 100644 index 7bd0addf02e..00000000000 --- a/torch_compile_ex_release.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Enable torch inductor freezing feature first -import os - -os.environ["TORCHINDUCTOR_FREEZING"] = "1" - - -import argparse -import copy -import time -from collections import defaultdict - -import openvino.torch # noqa -import torch - -# Optional: using the C++ wrapper instead of default Python wrapper -import torch._inductor.config as config -import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq -import torchvision.models as models -from torch._export import capture_pre_autograd_graph -from torch.ao.quantization.quantize_pt2e import convert_pt2e -from torch.ao.quantization.quantize_pt2e import prepare_pt2e -from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer -from torch.fx.passes.graph_drawer import FxGraphDrawer - -from nncf.experimental.torch_fx.model_transformer import QPARAMPerChannel -from nncf.experimental.torch_fx.model_transformer import QPARAMSPerTensor -from nncf.experimental.torch_fx.model_transformer import insert_qdq_to_model -from nncf.experimental.torch_fx.nncf_graph_builder import GraphConverter # noqa - - -def get_exported_model_from_nn_module(module, example_inputs): - with torch.no_grad(): - return capture_pre_autograd_graph(module, example_inputs) - - -NNCF_IMPL = True - - -def get_qsetup(exported_model, example_inputs): - quantizer = X86InductorQuantizer() - quantizer.set_global(xiq.get_default_x86_inductor_quantization_config()) - - prepared_model = prepare_pt2e(exported_model, quantizer) - prepared_model(*example_inputs) - converted_model = convert_pt2e(prepared_model) - g = FxGraphDrawer(converted_model, "resnet18_int8") - g.get_dot_graph().write_svg("resnet18_int8_compiled.svg") - qsetup = defaultdict(lambda: dict()) - - for node in converted_model.graph.nodes: - if "dequantize" in node.name: - quantize = node.all_input_nodes[0] - # place = "activations" - # if len(quantize.all_input_nodes) > 1: - # place = "weights" - if "per_tensor" in node.name: - params = QPARAMSPerTensor(*node.args[1:]) - else: - params = [] - for i in range(1, 3): - name = node.args[i].target - params.append(getattr(converted_model, name)) - params = QPARAMPerChannel(*(params + list(node.args[3:]))) - - target_node_name = quantize.all_input_nodes[0].name - qsetup[target_node_name] = params - return qsetup - - -def quantize(model, example_inputs): - if NNCF_IMPL: - # Use NNCF here on exported model - # to create a quantized model which is compatible with - # convert_pt2e function - pass - # 1. Convert torch.graph to NNCFGraph. - # # 2. Analize nncf grpah for SQ/CA - # # 3. Collect statistics - # # 4. Update params - # 5. Analize nncf graph for quantization - # 6. Insert observers - # 7. prepared_model(*example_inputs) - # 8. convert_pt2e(prepared_model) - import nncf - - calibration_dataset = nncf.Dataset(example_inputs) - exported_model = get_exported_model_from_nn_module(model, example_inputs) - quantized_model = nncf.quantize(exported_model, calibration_dataset) - g = FxGraphDrawer(quantized_model, "resnet18_quantized_native_nncf") - g.get_dot_graph().write_svg("resnet18_quantized_native_nncf.svg") - return quantized_model - - else: - # g = FxGraphDrawer(exported_model, "resnet18") - # g.get_dot_graph().write_svg("resnet18_compiled.svg") - - # MOCK NNCF QUANTIZATION - exported_model = get_exported_model_from_nn_module(model, example_inputs) - qsetup = get_qsetup(exported_model, example_inputs) - exported_model = get_exported_model_from_nn_module(model, example_inputs) - exported_model = insert_qdq_to_model(exported_model, qsetup) - g = FxGraphDrawer(exported_model, "resnet18_int8") - g.get_dot_graph().write_svg("resnet18_int8_compiled_manually.svg") - return exported_model - - return None # converted_model - - -config.cpp_wrapper = True - - -def measure_time(model, example_inputs, num_iters): - with torch.no_grad(): - model(*example_inputs) - total_time = 0 - for i in range(0, num_iters): - start_time = time.time() - model(*example_inputs) - total_time += time.time() - start_time - average_time = (total_time / num_iters) * 1000 - return average_time - - -def get_dummy_dataset(): - traced_bs = 1 - x = torch.randn(traced_bs, 3, 224, 224).contiguous(memory_format=torch.channels_last) - example_inputs = (x,) - return example_inputs - - -def main_nncf(model_name, num_iters): - model = models.__dict__[model_name](pretrained=True) - model = model.eval() - - example_inputs = get_dummy_dataset() - import nncf - - calibration_dataset = nncf.Dataset(example_inputs) - quantized_model = nncf.quantize(model, calibration_dataset) - - import openvino as ov - - ov_model = ov.convert_model(quantized_model.cpu(), example_input=example_inputs[0]) - ov.serialize(ov_model, "./model_cache_nncf/model.xml") - - -def main(model_name, num_iters): - model = models.__dict__[model_name](pretrained=True) - model = model.eval() - - example_inputs = get_dummy_dataset() - - converted_model = quantize(copy.deepcopy(model), example_inputs) - - print("original model execution time: ", measure_time(model, example_inputs, num_iters)) - - native_optimized_model_fp32 = torch.compile(model) - print( - "Torch Inductor FP32 model execution time: ", - measure_time(native_optimized_model_fp32, example_inputs, num_iters), - ) - - native_optimized_model_int8 = torch.compile(converted_model) - print( - "Torch Inductor INT8 model execution time: ", - measure_time(native_optimized_model_int8, example_inputs, num_iters), - ) - - ov_optimized_model_fp32 = torch.compile(model, backend="openvino") - print( - "Torch.compile OpenVINO FP32 model execution time: ", - measure_time(ov_optimized_model_fp32, example_inputs, num_iters), - ) - - ov_optimized_model_int8 = torch.compile( - converted_model, backend="openvino", options={"model_caching": True, "cache_dir": "./model_cache"} - ) - print( - "Torch.compile OpenVINO INT8 model execution time: ", - measure_time(ov_optimized_model_int8, example_inputs, num_iters), - ) - - import intel_extension_for_pytorch # noqa - - ipex_optimized_model_fp32 = torch.compile(model, backend="ipex") - print( - "Torch.compile IPEX FP32 model execution time: ", - measure_time(ipex_optimized_model_fp32, example_inputs, num_iters), - ) - - ipex_optimized_model_int8 = torch.compile(converted_model, backend="ipex") - print( - "Torch.compile IPEX INT8 model execution time: ", - measure_time(ipex_optimized_model_int8, example_inputs, num_iters), - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--num_iters", help="number of inference iterations", type=int, default=100) - parser.add_argument("--model", help="torchvision model name", type=str, default="resnet18") - args = parser.parse_args() - model_name = args.model - num_iters = args.num_iters - main(model_name, num_iters) - # main_nncf(model_name, num_iters) diff --git a/yolo_fx_bad_metrics_repro.py b/yolo_fx_bad_metrics_repro.py deleted file mode 100644 index b5c05d6bbcb..00000000000 --- a/yolo_fx_bad_metrics_repro.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2024 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, Tuple - -import numpy as np -import torch -from tqdm import tqdm -from ultralytics.data.utils import check_det_dataset -from ultralytics.engine.validator import BaseValidator as Validator -from ultralytics.models.yolo import YOLO -from ultralytics.utils.torch_utils import de_parallel - - -def print_statistics(stats: np.ndarray, total_images: int, total_objects: int) -> None: - mp, mr, map50, mean_ap = ( - stats["metrics/precision(B)"], - stats["metrics/recall(B)"], - stats["metrics/mAP50(B)"], - stats["metrics/mAP50-95(B)"], - ) - s = ("%20s" + "%12s" * 6) % ("Class", "Images", "Labels", "Precision", "Recall", "mAP@.5", "mAP@.5:.95") - print(s) - pf = "%20s" + "%12i" * 2 + "%12.3g" * 4 # print format - print(pf % ("all", total_images, total_objects, mp, mr, map50, mean_ap)) - - -def prepare_validation(model: YOLO, data: str) -> Tuple[Validator, torch.utils.data.DataLoader]: - # custom = {"rect": True, "batch": 1} # method defaults - # rect: false forces to resize all input pictures to one size - custom = {"rect": False, "batch": 1} # method defaults - args = {**model.overrides, **custom, "mode": "val"} # highest priority args on the right - - validator = model._smart_load("validator")(args=args, _callbacks=model.callbacks) - stride = 32 # default stride - validator.stride = stride # used in get_dataloader() for padding - validator.data = check_det_dataset(data) - validator.init_metrics(de_parallel(model)) - - data_loader = validator.get_dataloader(validator.data.get(validator.args.split), validator.args.batch) - return validator, data_loader - - -def validate(model, data_loader: torch.utils.data.DataLoader, validator: Validator) -> Tuple[Dict, int, int]: - with torch.no_grad(): - for batch in data_loader: - batch = validator.preprocess(batch) - preds = model(batch["img"]) - preds = validator.postprocess(preds) - validator.update_metrics(preds, batch) - stats = validator.get_stats() - return stats, validator.seen, validator.nt_per_class.sum() - - -def main(torch_fx): - # ultralytics @ git+https://github.com/THU-MIG/yolov10.git@2c36ab0f108efdd17c7e290564bb845ccb6844d8 - # pip install git+https://github.com/THU-MIG/yolov10.git - # pip install huggingface-hub - # yolo_model = YOLO("yolov10n.pt") - - yolo_model = YOLO("yolov8n") - - model_type = "torch" - model = yolo_model.model - if torch_fx: - model = torch.compile(model) - model_type = "FX" - print(f"FP32 {model_type} model validation results:") - validator, data_loader = prepare_validation(yolo_model, "coco128.yaml") - stats, total_images, total_objects = validate(model, tqdm(data_loader), validator) - print_statistics(stats, total_images, total_objects) - - -if __name__ == "__main__": - print("Torch model:") - main(torch_fx=False) - print("Torch FX model:") - main(torch_fx=True)