diff --git a/aa_torch_fx.py b/aa_torch_fx.py
deleted file mode 100644
index 339d33d1598..00000000000
--- a/aa_torch_fx.py
+++ /dev/null
@@ -1,456 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import copy
-import re
-import subprocess
-import time
-import warnings
-from itertools import islice
-from pathlib import Path
-
-import numpy as np
-import openvino as ov
-import openvino.torch  # noqa
-import pandas as pd
-import torch
-import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
-import torchvision.models as models
-from sklearn.metrics import accuracy_score
-from torch._export import capture_pre_autograd_graph
-from torch.ao.quantization.quantize_pt2e import convert_pt2e
-from torch.ao.quantization.quantize_pt2e import prepare_pt2e
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
-from torch.fx.passes.graph_drawer import FxGraphDrawer
-from torch.jit import TracerWarning
-from torchao.utils import benchmark_model as ao_benchmark_model
-from torchvision import datasets
-from transformers import AutoImageProcessor
-from transformers import AutoModelForImageClassification
-
-import nncf
-from nncf.common.logging.track_progress import track
-from nncf.common.quantization.structs import QuantizationPreset  # noqa
-from nncf.parameters import ModelType
-from nncf.torch.dynamic_graph.patch_pytorch import disable_patching
-
-warnings.filterwarnings("ignore", category=TracerWarning)
-warnings.filterwarnings("ignore", category=UserWarning)
-
-DATASET_IMAGENET = "/home/dlyakhov/datasets/imagenet/val"
-
-hf_models = ()
-
-
-def hf_model_builder(model_id: str):
-    def build(weights):
-        processor = AutoImageProcessor.from_pretrained(model_id)
-        model = AutoModelForImageClassification.from_pretrained(model_id)
-
-        class ModelWithProcessing(torch.nn.Module):
-            def __init__(self, processor, model):
-                super().__init__()
-                self.processor = processor
-                self.model = model
-
-            def forward(self, x):
-                processed_input = processor(x, return_tensors="pt")
-                return model(processed_input)
-
-        # return ModelWithProcessing(processor, model)
-        return model
-
-    class DummyWeights:
-        def transforms(self):
-            return models.ResNet18_Weights.DEFAULT.transforms()
-
-        @property
-        def meta(self):
-            return {}
-
-    return build, DummyWeights()
-
-
-MODELS_DICT = {
-    "vit_h_14": (models.vit_h_14, models.ViT_H_14_Weights.DEFAULT),
-    "vit_b_16": (models.vit_b_16, models.ViT_B_16_Weights.DEFAULT),
-    "swin_v2_t": (models.swin_v2_t, models.Swin_V2_T_Weights.DEFAULT),
-    "swin_v2_s": (models.swin_v2_s, models.Swin_V2_S_Weights.DEFAULT),
-    "resnet18": (models.resnet18, models.ResNet18_Weights.DEFAULT),
-    "resnet50": (models.resnet50, models.ResNet50_Weights.DEFAULT),
-    "mobilenet_v2": (models.mobilenet_v2, models.MobileNet_V2_Weights.DEFAULT),
-    "mobilenet_v3_small": (models.mobilenet_v3_small, models.MobileNet_V3_Small_Weights.DEFAULT),
-    "mobilenet_v3_large": (models.mobilenet_v3_large, models.MobileNet_V3_Large_Weights.DEFAULT),
-    # "densenet161": (models.densenet161, models.DenseNet161_Weights.DEFAULT),
-    "vgg16": (models.vgg16, models.VGG16_Weights.DEFAULT),
-    "efficientnet_b7": (models.efficientnet_b7, models.EfficientNet_B7_Weights.DEFAULT),
-    "inception_v3": (models.inception_v3, models.Inception_V3_Weights.DEFAULT),
-    "regnet_x_32gf": (models.regnet_x_32gf, models.RegNet_X_32GF_Weights.DEFAULT),
-    # "google/vit-base-patch16-224": hf_model_builder("google/vit-base-patch16-224"),
-    # "convnext_large": (models.convnext_large, models.ConvNeXt_Large_Weights.DEFAULT),
-    # "convnext_small": (models.convnext_small, models.ConvNeXt_Small_Weights.DEFAULT),
-}
-
-
-def measure_time(model, example_inputs, num_iters=1000):
-    with torch.no_grad():
-        model(*example_inputs)
-        total_time = 0
-        for i in range(0, num_iters):
-            start_time = time.time()
-            model(*example_inputs)
-            total_time += time.time() - start_time
-        average_time = (total_time / num_iters) * 1000
-    return average_time
-
-
-def measure_time_ov(model, example_inputs, num_iters=1000):
-    ie = ov.Core()
-    compiled_model = ie.compile_model(model, "CPU")
-    infer_request = compiled_model.create_infer_request()
-    infer_request.infer(example_inputs)
-    total_time = 0
-    for i in range(0, num_iters):
-        start_time = time.time()
-        infer_request.infer(example_inputs)
-        total_time += time.time() - start_time
-    average_time = (total_time / num_iters) * 1000
-    return average_time
-
-
-def quantize(model, example_inputs, calibration_dataset, subset_size=300):
-    with torch.no_grad():
-        exported_model = capture_pre_autograd_graph(model, example_inputs)
-
-    quantizer = X86InductorQuantizer()
-    quantizer.set_global(xiq.get_default_x86_inductor_quantization_config())
-
-    prepared_model = prepare_pt2e(exported_model, quantizer)
-    from tqdm import tqdm
-
-    for inp, _ in islice(tqdm(calibration_dataset), subset_size):
-        prepared_model(inp)
-    converted_model = convert_pt2e(prepared_model)
-    return converted_model
-
-
-def validate(model, val_loader, subset_size=None):
-    dataset_size = len(val_loader)
-
-    predictions = np.zeros((dataset_size))
-    references = -1 * np.ones((dataset_size))
-
-    with track(total=dataset_size, description="Validation") as pbar:
-
-        for i, (images, target) in enumerate(val_loader):
-            if subset_size is not None and i >= subset_size:
-                break
-
-            output_data = model(images).detach().numpy()
-            predicted_label = np.argmax(output_data, axis=1)
-            predictions[i] = predicted_label.item()
-            references[i] = target
-            pbar.progress.update(pbar.task, advance=1)
-    acc_top1 = accuracy_score(predictions, references) * 100
-    print(acc_top1)
-    return acc_top1
-
-
-def validate_ov(model, val_loader):
-    dataset_size = len(val_loader)
-
-    # Initialize result tensors for async inference support.
-    predictions = np.zeros((dataset_size))
-    references = -1 * np.ones((dataset_size))
-
-    core = ov.Core()
-    compiled_model = core.compile_model(model)
-
-    infer_queue = ov.AsyncInferQueue(compiled_model, 4)
-    with track(total=dataset_size, description="Validation") as pbar:
-
-        def process_result(request, userdata):
-            output_data = request.get_output_tensor().data
-            predicted_label = np.argmax(output_data, axis=1)
-            predictions[userdata] = predicted_label.item()
-            pbar.progress.update(pbar.task, advance=1)
-
-        infer_queue.set_callback(process_result)
-
-        for i, (images, target) in enumerate(val_loader):
-            # W/A for memory leaks when using torch DataLoader and OpenVINO
-            image_copies = copy.deepcopy(images.numpy())
-            infer_queue.start_async(image_copies, userdata=i)
-            references[i] = target
-
-        infer_queue.wait_all()
-
-    acc_top1 = accuracy_score(predictions, references) * 100
-    print(acc_top1)
-    return acc_top1
-
-
-def run_benchmark(model_path: Path, shape) -> float:
-    command = f"benchmark_app -m {model_path} -d CPU -api async -t 15"
-    command += f' -shape="[{",".join(str(x) for x in shape)}]"'
-    cmd_output = subprocess.check_output(command, shell=True)  # nosec
-    match = re.search(r"Throughput\: (.+?) FPS", str(cmd_output))
-    return float(match.group(1))
-
-
-def torch_ao_sq_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input):
-    import torch
-    from torchao.quantization.smoothquant import smooth_fq_linear_to_inference
-    from torchao.quantization.smoothquant import swap_linear_with_smooth_fq_linear
-
-    # Fuse the int8*int8 -> int32 matmul and subsequent mul op avoiding materialization of the int32 intermediary tensor
-    torch._inductor.config.force_fuse_int_mm_with_mul = True
-
-    # plug in your model
-    # model = torch.compile(pt_model)
-    model = pt_model
-
-    # convert linear modules to smoothquant
-    # linear module in calibration mode
-    swap_linear_with_smooth_fq_linear(model)
-
-    # Create a data loader for calibration
-    calibration_loader = val_loader
-
-    # Calibrate the model
-    model.train()
-    from tqdm import tqdm
-
-    for batch in tqdm(islice(calibration_loader, 300)):
-        inputs = batch[0]
-        model(inputs)
-
-    # set it to inference mode
-    smooth_fq_linear_to_inference(model)
-
-    # compile the model to improve performance
-    model = torch.compile(model, mode="max-autotune")
-    acc1_quant_model = validate(model, val_loader)
-    print(f"torch ao metric acc@1: {acc1_quant_model}")
-    result["torch_ao_quant_model_acc"] = acc1_quant_model
-
-    latency = ao_benchmark_model(model, 20, example_input)
-    print(f"torch ao latency: {latency}")
-    result["torch_ao_quant_model_latency"] = latency
-
-
-def nncf_fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input):
-    with disable_patching():
-        with torch.no_grad():
-            exported_model = capture_pre_autograd_graph(pt_model, (example_input,))
-
-        def transform(x):
-            return x[0]
-
-        quant_fx_model = nncf.quantize(
-            exported_model, nncf.Dataset(val_loader, transform_func=transform), model_type=ModelType.TRANSFORMER
-        )
-        quant_compile_model = torch.compile(quant_fx_model, backend="openvino")
-
-        # acc1_quant_model = validate(quant_compile_model, val_loader)
-        acc1_quant_model = -1.0
-        latency_fx = measure_time(quant_compile_model, (example_input,))
-        print(f"latency: {latency_fx}")
-        result["acc1_nncf_fx_quant_model"] = acc1_quant_model
-        result["torch_compile_ov_latency_nncf_fx_quant_model"] = latency_fx
-
-        g = FxGraphDrawer(quant_compile_model, f"b_nncf_{pt_model.__class__.__name__}_int8")
-        g.get_dot_graph().write_svg(f"b_nncf_{pt_model.__class__.__name__}_int8.svg")
-
-        # EXPORT TO OV
-        exported_model = torch.export.export(quant_compile_model, (example_input,))
-        ov_quant_model = ov.convert_model(exported_model, example_input=example_input)
-        quant_file_path = output_dir / "quant.xml"
-        ov.save_model(ov_quant_model, quant_file_path)
-
-        fps = run_benchmark(quant_file_path, shape_input)
-        print(f"fps: {fps}")
-        result["ov_fps_nncf_fx_quant_model"] = fps
-
-
-def fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input):
-    with disable_patching():
-        fp32_pt_model = copy.deepcopy(pt_model)
-        fp32_compile_model = torch.compile(fp32_pt_model, backend="openvino")
-
-        quant_pt_model = quantize(fp32_compile_model, (example_input,), val_loader)
-        quant_compile_model = torch.compile(quant_pt_model, backend="openvino")
-
-        g = FxGraphDrawer(quant_pt_model, f"b_pt_{pt_model.__class__.__name__}_int8")
-        g.get_dot_graph().write_svg(f"b_pt_{pt_model.__class__.__name__}_int8.svg")
-
-        acc1_quant_model = validate(quant_compile_model, val_loader)
-        result["acc1_quant_model"] = acc1_quant_model
-
-        latency_fx = measure_time(quant_compile_model, (example_input,))
-        print(f"latency: {latency_fx}")
-        result["torch_compile_latency_fps_quant_model"] = latency_fx
-
-
-def nncf_pt_2_ov_quantization(pt_model, val_loader, example_input, output_dir, result, shape_input):
-    def transform(x):
-        return x[0]
-
-    nncf_model = nncf.quantize(copy.deepcopy(pt_model), nncf.Dataset(val_loader, transform_func=transform))
-
-    ov_nncf_model = ov.convert_model(nncf_model, example_input=example_input)
-    nncf_pt_file_path = output_dir / "nncf_pt.xml"
-    ov.save_model(ov_nncf_model, nncf_pt_file_path)
-    acc1_nncf_pt = validate_ov(ov_nncf_model, val_loader)
-    result["acc1_nncf_pt"] = acc1_nncf_pt
-    fps = run_benchmark(nncf_pt_file_path, shape_input)
-    print(f"fps: {fps}")
-    result["ov_fps_nncf_pt"] = fps
-
-
-def nncf_ov_2_ov_quantization(ov_fp32_model, val_loader, output_dir, result, shape_input):
-    def transform(x):
-        return np.array(x[0])
-
-    from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
-    from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
-
-    advanced_params = AdvancedQuantizationParameters()
-    # for sq_param in [-1, 0.15, 0.5, 0.75]:
-    for sq_param in [0.95]:
-        advanced_params.smooth_quant_alphas = AdvancedSmoothQuantParameters(matmul=sq_param)
-
-        from copy import deepcopy
-
-        fast_bias_correction = True
-        nncf_ov_int8_model = nncf.quantize(
-            deepcopy(ov_fp32_model),
-            nncf.Dataset(val_loader, transform_func=transform),
-            fast_bias_correction=fast_bias_correction,
-            model_type=ModelType.TRANSFORMER,
-            preset=QuantizationPreset.MIXED,
-            advanced_parameters=advanced_params,
-        )
-        acc1_nncf_ov = validate_ov(nncf_ov_int8_model, val_loader)
-        result[f"acc1_nncf_ov_{sq_param}"] = acc1_nncf_ov
-        for precision, model in (("int8", nncf_ov_int8_model), ("fp32", ov_fp32_model)):
-            nncf_ov_file_path = output_dir / f"nncf_ov_{precision}.xml"
-            ov.save_model(model, nncf_ov_file_path)
-            fps = run_benchmark(nncf_ov_file_path, shape_input)
-            print(f"fps_{precision}: {fps} {sq_param}")
-            result[f"ov_fps_nncf_ov_{precision}_{sq_param}"] = fps
-
-            latency = measure_time_ov(model, next(iter(val_loader))[0], num_iters=10_000)
-            print(f"latency_{precision}: {latency}")
-            result[f"ov_latency_nncf_ov_{precision}_{sq_param}"] = latency
-
-
-def process_model(model_name: str):
-
-    result = {"name": model_name}
-    model_cls, model_weights = MODELS_DICT[model_name]
-    output_dir = Path("models") / model_name
-    output_dir.mkdir(exist_ok=True)
-    ##############################################################
-    # Prepare dataset
-    ##############################################################
-
-    val_dataset = datasets.ImageFolder(root=DATASET_IMAGENET, transform=model_weights.transforms())
-    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)
-
-    ##############################################################
-    # Prepare original model
-    ##############################################################
-
-    pt_model = model_cls(weights=model_weights)
-    pt_model = pt_model.eval()
-    example_input = next(iter(val_loader))[0]
-    shape_input = list(example_input.shape)
-    ##############################################################
-    # Process FP32 Model
-    ##############################################################
-
-    fp32_pt_model = copy.deepcopy(pt_model)
-
-    orig_infer_acc1 = model_weights.meta.get("_metrics", {}).get("ImageNet-1K", {}).get("acc@1")
-    print(f"fp32 model metric: {orig_infer_acc1}")
-    # orig_infer_acc1 = validate(fp32_pt_model, val_loader)
-    result["acc1_fp32_openvino"] = orig_infer_acc1
-
-    fp32_pt_model = torch.export.export(fp32_pt_model, (example_input,))
-    ov_fp32_model = ov.convert_model(fp32_pt_model, example_input=example_input)
-    ov_fp32_file_path = None
-    ov_fp32_file_path = output_dir / "fp32.xml"
-    ov.save_model(ov_fp32_model, ov_fp32_file_path)
-    # result["fps_fp32_openvino"] = run_benchmark(ov_fp32_file_path, shape_input)
-    # print(f"fps_fp32_openvino {result['fps_fp32_openvino']}")
-
-    del fp32_pt_model
-    ##############################################################
-    # Process Torch AO Quantize with SQ
-    ##############################################################
-    # torch_ao_sq_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input)
-
-    ##############################################################
-    # with torch.no_grad():
-    #    exported_model = capture_pre_autograd_graph(pt_model, (example_input,))
-    #    latency_fx = measure_time(torch.compile(exported_model), (example_input,))
-    # print(f"latency: {latency_fx}")
-    #############################################################
-
-    ##############################################################
-    # Process PT Quantize
-    ##############################################################
-    fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input)
-
-    ##############################################################
-    # Process NNCF FX Quantize
-    ##############################################################
-    # nncf_fx_2_ov_quantization(pt_model, example_input, output_dir, result, val_loader, shape_input)
-
-    ##############################################################
-    # Process NNCF Quantize by PT
-    ##############################################################
-    # nncf_pt_2_ov_quantization(pt_model, val_loader, example_input, output_dir, result, shape_input)
-
-    ##############################################################
-    # Process NNCF Quantize by OV
-    ##############################################################
-    # nncf_ov_2_ov_quantization(ov_fp32_model, val_loader, output_dir, result, shape_input)
-
-    print(result)
-    return result
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", help="torchvision model name", type=str, default="all")
-    parser.add_argument("--file_name", help="output csv file_name", type=str, default="result.csv")
-
-    args = parser.parse_args()
-
-    results_list = []
-    if args.model == "all":
-        for model_name in MODELS_DICT:
-            print("---------------------------------------------------")
-            print(f"name: {model_name}")
-            results_list.append(process_model(model_name))
-    else:
-        results_list.append(process_model(args.model))
-
-    df = pd.DataFrame(results_list)
-    print(df)
-    df.to_csv(args.file_name)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/llm_compression/openvino/tiny_llama/main.py b/examples/llm_compression/openvino/tiny_llama/main.py
index e5f3893f1ab..f2be54ce1aa 100644
--- a/examples/llm_compression/openvino/tiny_llama/main.py
+++ b/examples/llm_compression/openvino/tiny_llama/main.py
@@ -11,12 +11,12 @@
 import time
 from functools import partial
 
+import datasets
 import numpy as np
 import openvino as ov
 from optimum.intel.openvino import OVModelForCausalLM
 from transformers import AutoTokenizer
 
-import datasets
 import nncf
 
 
diff --git a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py
index e34b09bc2f9..b3fbce5722b 100644
--- a/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py
+++ b/examples/llm_compression/openvino/tiny_llama_find_hyperparams/main.py
@@ -17,12 +17,12 @@
 
 import numpy as np
 import openvino as ov
+from datasets import load_dataset
 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer
 from whowhatbench import Evaluator
 
 import nncf
-from datasets import load_dataset
 from nncf.common.logging import nncf_logger
 
 DataItem = TypeVar("DataItem")
diff --git a/nncf/quantization/algorithms/min_max/torch_fx_backend.py b/nncf/quantization/algorithms/min_max/torch_fx_backend.py
index c5403386441..bdeed5343c8 100644
--- a/nncf/quantization/algorithms/min_max/torch_fx_backend.py
+++ b/nncf/quantization/algorithms/min_max/torch_fx_backend.py
@@ -104,7 +104,7 @@ def group_conv_metatypes(self) -> List[OperatorMetatype]:
 
     @property
     def scaled_dot_product_attention_metatypes(self) -> List[OperatorMetatype]:
-        return []
+        return [om.PTScaledDotProductAttentionMetatype]
 
     @property
     def scales_unification_map(self) -> Dict[OperatorMetatype, OperatorMetatype]:
diff --git a/tests/torch/sparsity/movement/helpers/run_recipe.py b/tests/torch/sparsity/movement/helpers/run_recipe.py
index 383552932d5..77b3140a967 100644
--- a/tests/torch/sparsity/movement/helpers/run_recipe.py
+++ b/tests/torch/sparsity/movement/helpers/run_recipe.py
@@ -20,6 +20,7 @@
 import torch.nn
 import torch.nn.functional as F
 import torch.utils.data
+from datasets import Dataset
 from transformers import AutoModelForAudioClassification
 from transformers import AutoModelForImageClassification
 from transformers import AutoModelForSequenceClassification
@@ -33,7 +34,6 @@
 from transformers import SwinConfig
 from transformers import Wav2Vec2Config
 
-from datasets import Dataset
 from nncf import NNCFConfig
 from nncf.experimental.torch.sparsity.movement.scheduler import MovementSchedulerParams
 from nncf.torch.dynamic_graph.io_handling import FillerInputElement
diff --git a/tests/torch/sparsity/movement/helpers/trainer.py b/tests/torch/sparsity/movement/helpers/trainer.py
index 2af37c5b2f4..89ffeb6c865 100644
--- a/tests/torch/sparsity/movement/helpers/trainer.py
+++ b/tests/torch/sparsity/movement/helpers/trainer.py
@@ -14,6 +14,7 @@
 
 import numpy as np
 import torch
+from datasets import Dataset  # pylint: disable=no-name-in-module
 from transformers import TrainingArguments
 from transformers.trainer import Trainer
 from transformers.trainer_callback import TrainerCallback
@@ -21,7 +22,6 @@
 from transformers.trainer_callback import TrainerState
 from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 
-from datasets import Dataset  # pylint: disable=no-name-in-module
 from nncf.api.compression import CompressionAlgorithmController
 from nncf.common.compression import BaseCompressionAlgorithmController
 from nncf.common.utils.tensorboard import prepare_for_tensorboard
diff --git a/tests/torch/sparsity/movement/test_model_saving.py b/tests/torch/sparsity/movement/test_model_saving.py
index c7949afeb82..27a9655591a 100644
--- a/tests/torch/sparsity/movement/test_model_saving.py
+++ b/tests/torch/sparsity/movement/test_model_saving.py
@@ -18,6 +18,7 @@
 import pytest
 import torch
 from addict import Dict
+from datasets import Dataset
 from onnx import numpy_helper
 from openvino._offline_transformations import apply_fused_names_cleanup
 from openvino._offline_transformations import apply_moc_transformations
@@ -28,7 +29,6 @@
 from scipy.special import softmax
 from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 
-from datasets import Dataset
 from nncf.torch import create_compressed_model
 from nncf.torch.checkpoint_loading import load_state
 from tests.torch.helpers import PTTensorListComparator
diff --git a/tests/torch/sparsity/movement/training_scripts/run_glue.py b/tests/torch/sparsity/movement/training_scripts/run_glue.py
index d0f5b14269e..360832a5bb7 100644
--- a/tests/torch/sparsity/movement/training_scripts/run_glue.py
+++ b/tests/torch/sparsity/movement/training_scripts/run_glue.py
@@ -12,13 +12,12 @@
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 
+import datasets
 import evaluate
 import jstyleson
 import numpy as np
 from transformers.training_args import ParallelMode
 
-import datasets
-
 # isort: off
 from nncf import NNCFConfig
 from nncf.api.compression import CompressionAlgorithmController
diff --git a/torch_compile_ex_release.py b/torch_compile_ex_release.py
deleted file mode 100644
index 7bd0addf02e..00000000000
--- a/torch_compile_ex_release.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Enable torch inductor freezing feature first
-import os
-
-os.environ["TORCHINDUCTOR_FREEZING"] = "1"
-
-
-import argparse
-import copy
-import time
-from collections import defaultdict
-
-import openvino.torch  # noqa
-import torch
-
-# Optional: using the C++ wrapper instead of default Python wrapper
-import torch._inductor.config as config
-import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
-import torchvision.models as models
-from torch._export import capture_pre_autograd_graph
-from torch.ao.quantization.quantize_pt2e import convert_pt2e
-from torch.ao.quantization.quantize_pt2e import prepare_pt2e
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
-from torch.fx.passes.graph_drawer import FxGraphDrawer
-
-from nncf.experimental.torch_fx.model_transformer import QPARAMPerChannel
-from nncf.experimental.torch_fx.model_transformer import QPARAMSPerTensor
-from nncf.experimental.torch_fx.model_transformer import insert_qdq_to_model
-from nncf.experimental.torch_fx.nncf_graph_builder import GraphConverter  # noqa
-
-
-def get_exported_model_from_nn_module(module, example_inputs):
-    with torch.no_grad():
-        return capture_pre_autograd_graph(module, example_inputs)
-
-
-NNCF_IMPL = True
-
-
-def get_qsetup(exported_model, example_inputs):
-    quantizer = X86InductorQuantizer()
-    quantizer.set_global(xiq.get_default_x86_inductor_quantization_config())
-
-    prepared_model = prepare_pt2e(exported_model, quantizer)
-    prepared_model(*example_inputs)
-    converted_model = convert_pt2e(prepared_model)
-    g = FxGraphDrawer(converted_model, "resnet18_int8")
-    g.get_dot_graph().write_svg("resnet18_int8_compiled.svg")
-    qsetup = defaultdict(lambda: dict())
-
-    for node in converted_model.graph.nodes:
-        if "dequantize" in node.name:
-            quantize = node.all_input_nodes[0]
-            # place = "activations"
-            # if len(quantize.all_input_nodes) > 1:
-            #    place = "weights"
-            if "per_tensor" in node.name:
-                params = QPARAMSPerTensor(*node.args[1:])
-            else:
-                params = []
-                for i in range(1, 3):
-                    name = node.args[i].target
-                    params.append(getattr(converted_model, name))
-                params = QPARAMPerChannel(*(params + list(node.args[3:])))
-
-            target_node_name = quantize.all_input_nodes[0].name
-            qsetup[target_node_name] = params
-    return qsetup
-
-
-def quantize(model, example_inputs):
-    if NNCF_IMPL:
-        # Use NNCF here on exported model
-        # to create a quantized model which is compatible with
-        # convert_pt2e function
-        pass
-        # 1. Convert torch.graph to NNCFGraph.
-        # # 2. Analize nncf grpah for SQ/CA
-        # # 3. Collect statistics
-        # # 4. Update params
-        # 5. Analize nncf graph for quantization
-        # 6. Insert observers
-        # 7. prepared_model(*example_inputs)
-        # 8. convert_pt2e(prepared_model)
-        import nncf
-
-        calibration_dataset = nncf.Dataset(example_inputs)
-        exported_model = get_exported_model_from_nn_module(model, example_inputs)
-        quantized_model = nncf.quantize(exported_model, calibration_dataset)
-        g = FxGraphDrawer(quantized_model, "resnet18_quantized_native_nncf")
-        g.get_dot_graph().write_svg("resnet18_quantized_native_nncf.svg")
-        return quantized_model
-
-    else:
-        # g = FxGraphDrawer(exported_model, "resnet18")
-        # g.get_dot_graph().write_svg("resnet18_compiled.svg")
-
-        # MOCK NNCF QUANTIZATION
-        exported_model = get_exported_model_from_nn_module(model, example_inputs)
-        qsetup = get_qsetup(exported_model, example_inputs)
-        exported_model = get_exported_model_from_nn_module(model, example_inputs)
-        exported_model = insert_qdq_to_model(exported_model, qsetup)
-        g = FxGraphDrawer(exported_model, "resnet18_int8")
-        g.get_dot_graph().write_svg("resnet18_int8_compiled_manually.svg")
-        return exported_model
-
-    return None  # converted_model
-
-
-config.cpp_wrapper = True
-
-
-def measure_time(model, example_inputs, num_iters):
-    with torch.no_grad():
-        model(*example_inputs)
-        total_time = 0
-        for i in range(0, num_iters):
-            start_time = time.time()
-            model(*example_inputs)
-            total_time += time.time() - start_time
-        average_time = (total_time / num_iters) * 1000
-    return average_time
-
-
-def get_dummy_dataset():
-    traced_bs = 1
-    x = torch.randn(traced_bs, 3, 224, 224).contiguous(memory_format=torch.channels_last)
-    example_inputs = (x,)
-    return example_inputs
-
-
-def main_nncf(model_name, num_iters):
-    model = models.__dict__[model_name](pretrained=True)
-    model = model.eval()
-
-    example_inputs = get_dummy_dataset()
-    import nncf
-
-    calibration_dataset = nncf.Dataset(example_inputs)
-    quantized_model = nncf.quantize(model, calibration_dataset)
-
-    import openvino as ov
-
-    ov_model = ov.convert_model(quantized_model.cpu(), example_input=example_inputs[0])
-    ov.serialize(ov_model, "./model_cache_nncf/model.xml")
-
-
-def main(model_name, num_iters):
-    model = models.__dict__[model_name](pretrained=True)
-    model = model.eval()
-
-    example_inputs = get_dummy_dataset()
-
-    converted_model = quantize(copy.deepcopy(model), example_inputs)
-
-    print("original model execution time: ", measure_time(model, example_inputs, num_iters))
-
-    native_optimized_model_fp32 = torch.compile(model)
-    print(
-        "Torch Inductor FP32 model execution time: ",
-        measure_time(native_optimized_model_fp32, example_inputs, num_iters),
-    )
-
-    native_optimized_model_int8 = torch.compile(converted_model)
-    print(
-        "Torch Inductor INT8 model execution time: ",
-        measure_time(native_optimized_model_int8, example_inputs, num_iters),
-    )
-
-    ov_optimized_model_fp32 = torch.compile(model, backend="openvino")
-    print(
-        "Torch.compile OpenVINO FP32 model execution time: ",
-        measure_time(ov_optimized_model_fp32, example_inputs, num_iters),
-    )
-
-    ov_optimized_model_int8 = torch.compile(
-        converted_model, backend="openvino", options={"model_caching": True, "cache_dir": "./model_cache"}
-    )
-    print(
-        "Torch.compile OpenVINO INT8 model execution time: ",
-        measure_time(ov_optimized_model_int8, example_inputs, num_iters),
-    )
-
-    import intel_extension_for_pytorch  # noqa
-
-    ipex_optimized_model_fp32 = torch.compile(model, backend="ipex")
-    print(
-        "Torch.compile IPEX FP32 model execution time: ",
-        measure_time(ipex_optimized_model_fp32, example_inputs, num_iters),
-    )
-
-    ipex_optimized_model_int8 = torch.compile(converted_model, backend="ipex")
-    print(
-        "Torch.compile IPEX INT8 model execution time: ",
-        measure_time(ipex_optimized_model_int8, example_inputs, num_iters),
-    )
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--num_iters", help="number of inference iterations", type=int, default=100)
-    parser.add_argument("--model", help="torchvision model name", type=str, default="resnet18")
-    args = parser.parse_args()
-    model_name = args.model
-    num_iters = args.num_iters
-    main(model_name, num_iters)
-    # main_nncf(model_name, num_iters)
diff --git a/yolo_fx_bad_metrics_repro.py b/yolo_fx_bad_metrics_repro.py
deleted file mode 100644
index b5c05d6bbcb..00000000000
--- a/yolo_fx_bad_metrics_repro.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Dict, Tuple
-
-import numpy as np
-import torch
-from tqdm import tqdm
-from ultralytics.data.utils import check_det_dataset
-from ultralytics.engine.validator import BaseValidator as Validator
-from ultralytics.models.yolo import YOLO
-from ultralytics.utils.torch_utils import de_parallel
-
-
-def print_statistics(stats: np.ndarray, total_images: int, total_objects: int) -> None:
-    mp, mr, map50, mean_ap = (
-        stats["metrics/precision(B)"],
-        stats["metrics/recall(B)"],
-        stats["metrics/mAP50(B)"],
-        stats["metrics/mAP50-95(B)"],
-    )
-    s = ("%20s" + "%12s" * 6) % ("Class", "Images", "Labels", "Precision", "Recall", "mAP@.5", "mAP@.5:.95")
-    print(s)
-    pf = "%20s" + "%12i" * 2 + "%12.3g" * 4  # print format
-    print(pf % ("all", total_images, total_objects, mp, mr, map50, mean_ap))
-
-
-def prepare_validation(model: YOLO, data: str) -> Tuple[Validator, torch.utils.data.DataLoader]:
-    # custom = {"rect": True, "batch": 1}  # method defaults
-    # rect: false forces to resize all input pictures to one size
-    custom = {"rect": False, "batch": 1}  # method defaults
-    args = {**model.overrides, **custom, "mode": "val"}  # highest priority args on the right
-
-    validator = model._smart_load("validator")(args=args, _callbacks=model.callbacks)
-    stride = 32  # default stride
-    validator.stride = stride  # used in get_dataloader() for padding
-    validator.data = check_det_dataset(data)
-    validator.init_metrics(de_parallel(model))
-
-    data_loader = validator.get_dataloader(validator.data.get(validator.args.split), validator.args.batch)
-    return validator, data_loader
-
-
-def validate(model, data_loader: torch.utils.data.DataLoader, validator: Validator) -> Tuple[Dict, int, int]:
-    with torch.no_grad():
-        for batch in data_loader:
-            batch = validator.preprocess(batch)
-            preds = model(batch["img"])
-            preds = validator.postprocess(preds)
-            validator.update_metrics(preds, batch)
-        stats = validator.get_stats()
-    return stats, validator.seen, validator.nt_per_class.sum()
-
-
-def main(torch_fx):
-    # ultralytics @ git+https://github.com/THU-MIG/yolov10.git@2c36ab0f108efdd17c7e290564bb845ccb6844d8
-    # pip install git+https://github.com/THU-MIG/yolov10.git
-    # pip install huggingface-hub
-    # yolo_model = YOLO("yolov10n.pt")
-
-    yolo_model = YOLO("yolov8n")
-
-    model_type = "torch"
-    model = yolo_model.model
-    if torch_fx:
-        model = torch.compile(model)
-        model_type = "FX"
-    print(f"FP32 {model_type} model validation results:")
-    validator, data_loader = prepare_validation(yolo_model, "coco128.yaml")
-    stats, total_images, total_objects = validate(model, tqdm(data_loader), validator)
-    print_statistics(stats, total_images, total_objects)
-
-
-if __name__ == "__main__":
-    print("Torch model:")
-    main(torch_fx=False)
-    print("Torch FX model:")
-    main(torch_fx=True)