ludwig-ai · alexsherstinsky · Jun 1, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/ludwig/cli.py b/ludwig/cli.py
@@ -58,6 +58,7 @@ def __init__(self):
  render_config Renders the fully populated config with all defaults set
  check_install Runs a quick training run on synthetic data to verify installation status
  upload Push trained model artifacts to a registry (e.g., Predibase, HuggingFace Hub)
+ pretrained_summary Displays a summary of pretrained model (e.g. alexnet, efficientnet)
 """,
  )
  parser.add_argument("command", help="Subcommand to run")
@@ -191,6 +192,11 @@ def upload(self):
 
  upload.cli(sys.argv[2:])
 
+ def pretrained_summary(self):
+ from ludwig.utils import pretrained_summary
+
+ pretrained_summary.cli_summarize_pretrained(sys.argv[2:])
+
 
 def main():
  ludwig.contrib.preload(sys.argv)

@@ -70,6 +70,15 @@ ecd:
  In many large-scale training runs, evaluation is often configured to run on
  a sub-epoch time scale, or every few thousand steps.
  ui_display_name: Checkpoints per epoch
+ layers_to_freeze_regex:
+ default_value_reasoning:
+ Default no layers will be frozen for fine-tuning a pretrained model.
+ description_implications:
+ Freezing specific layers can improve a pretrained models performance in a number
+ of ways. At a basic level, freezing early layers can prevent overfitting by retaining
+ more general features (beneficial for small datasets). Also can reduce computational
+ resource use and lower overall training time due to less gradient calculations.
+ expected_impact: 1
  early_stop:
  default_value_reasoning:
  Deep learning models are prone to overfitting. It's generally

@@ -86,6 +86,17 @@ class BaseTrainerConfig(schema_utils.BaseMarshmallowConfig, ABC):
  ),
  )
 
+ layers_to_freeze_regex: str = schema_utils.String(
+ default=None,
+ allow_none=True,
+ description=(
+ "Freeze specific layers based on provided regex. Freezing specific layers can improve a "
+ "pretrained models performance in a number of ways. At a basic level, freezing early layers can "
+ "prevent overfitting by retaining more general features (beneficial for small datasets). Also can "
+ "reduce computational resource use and lower overall training time due to less gradient calculations. "
+ ),
+ )
+
  early_stop: int = schema_utils.IntegerRange(
  default=5,
  min=-1,

@@ -68,6 +68,7 @@
 from ludwig.utils.torch_utils import get_torch_device
 from ludwig.utils.trainer_utils import (
  append_metrics,
+ freeze_layers_regex,
  get_final_steps_per_checkpoint,
  get_latest_metrics_dict,
  get_new_progress_tracker,
@@ -154,6 +155,7 @@ def __init__(
  self._validation_field = config.validation_field
  self._validation_metric = config.validation_metric
  self.early_stop = config.early_stop
+ self.layers_to_freeze_regex = config.layers_to_freeze_regex
  self.steps_per_checkpoint = config.steps_per_checkpoint
  self.checkpoints_per_epoch = config.checkpoints_per_epoch
  self.evaluate_training_set = config.evaluate_training_set
@@ -225,6 +227,10 @@ def prepare(self):
  base_learning_rate *= lr_scale_fn(self.distributed.size())
  self.base_learning_rate = base_learning_rate
 
+ # Given that regex is supplied, freeze layers
+ if self.config.layers_to_freeze_regex:
+ freeze_layers_regex(self.config, self.model)
+
  # We may need to replace the embedding layer when using 8-bit optimizers from bitsandbytes.
  update_embedding_layer(self.compiled_model, self.config)
 

diff --git a/ludwig/utils/pretrained_summary.py b/ludwig/utils/pretrained_summary.py
@@ -0,0 +1,152 @@
+#! /usr/bin/env python
+# Copyright (c) 2024 Predibase, Inc., 2019 Uber Technologies, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import argparse
+import importlib
+
+from ludwig.api_annotations import DeveloperAPI
+from ludwig.contrib import add_contrib_callback_args
+from ludwig.globals import LUDWIG_VERSION
+from ludwig.utils.print_utils import print_ludwig
+
+models = [
+ "alexnet",
+ "convnext",
+ "convnext_base",
+ "convnext_large",
+ "convnext_small",
+ "convnext_tiny",
+ "densenet",
+ "densenet121",
+ "densenet161",
+ "densenet169",
+ "densenet201",
+ "efficientnet",
+ "efficientnet_b0",
+ "efficientnet_b1",
+ "efficientnet_b2",
+ "efficientnet_b3",
+ "efficientnet_b4",
+ "efficientnet_b5",
+ "efficientnet_b6",
+ "efficientnet_b7",
+ "efficientnet_v2_l",
+ "efficientnet_v2_m",
+ "efficientnet_v2_s",
+ "googlenet",
+ "inception",
+ "inception_v3",
+ "maxvit",
+ "maxvit_t",
+ "mnasnet",
+ "mnasnet0_5",
+ "mnasnet0_75",
+ "mnasnet1_0",
+ "mnasnet1_3",
+ "mobilenet",
+ "mobilenet_v2",
+ "mobilenet_v3_large",
+ "mobilenet_v3_small",
+ "mobilenetv2",
+ "mobilenetv3",
+ "regnet",
+ "regnet_x_16gf",
+ "regnet_x_1_6gf",
+ "regnet_x_32gf",
+ "regnet_x_3_2gf",
+ "regnet_x_400mf",
+ "regnet_x_800mf",
+ "regnet_x_8gf",
+ "regnet_y_128gf",
+ "regnet_y_16gf",
+ "regnet_y_1_6gf",
+ "regnet_y_32gf",
+ "regnet_y_3_2gf",
+ "regnet_y_400mf",
+ "regnet_y_800mf",
+ "regnet_y_8gf",
+ "resnet",
+ "resnet101",
+ "resnet152",
+ "resnet18",
+ "resnet34",
+ "resnet50",
+ "resnext101_32x8d",
+ "resnext101_64x4d",
+ "resnext50_32x4d",
+ "shufflenet_v2_x0_5",
+ "shufflenet_v2_x1_0",
+ "shufflenet_v2_x1_5",
+ "shufflenet_v2_x2_0",
+ "shufflenetv2",
+ "squeezenet",
+ "squeezenet1_0",
+ "squeezenet1_1",
+ "swin_transformer",
+ "vgg",
+ "vgg11",
+ "vgg11_bn",
+ "vgg13",
+ "vgg13_bn",
+ "vgg16",
+ "vgg16_bn",
+ "vgg19",
+ "vgg19_bn",
+ "vit_b_16",
+ "vit_b_32",
+ "vit_h_14",
+ "vit_l_16",
+ "vit_l_32",
+ "wide_resnet101_2",
+ "wide_resnet50_2",
+]
+
+
+def pretrained_summary(model_name, **kwargs) -> None:
+ if model_name in models:
+ module = importlib.import_module("torchvision.models")
+ encoder_class = getattr(module, model_name)
+ model = encoder_class()
+
+ for name, _ in model.named_parameters():
+ print(name)
+ else:
+ print(f"No encoder found for '{model_name}'")
+
+
+@DeveloperAPI
+def cli_summarize_pretrained(sys_argv):
+ parser = argparse.ArgumentParser(
+ description="This script displays a summary of a pretrained model for freezing purposes.",
+ prog="ludwig pretrained_summary",
+ usage="%(prog)s [options]",
+ )
+ parser.add_argument("-m", "--model_name", help="output model layers", required=False, type=str)
+ parser.add_argument("-l", "--list_models", action="store_true", help="print available models")
+
+ add_contrib_callback_args(parser)
+ args = parser.parse_args(sys_argv)
+
+ args.callbacks = args.callbacks or []
+ for callback in args.callbacks:
+ callback.on_cmdline("pretrained_summary", *sys_argv)
+
+ print_ludwig("Model Summary", LUDWIG_VERSION)
+ if args.list_models:
+ print("Available models:")
+ for model in models:
+ print(f"- {model}")
+ else:
+ pretrained_summary(**vars(args))
@@ -1,4 +1,5 @@
 import logging
+import re
 from collections import defaultdict
 from typing import Dict, List, Tuple, TYPE_CHECKING
 
@@ -10,6 +11,7 @@
 from ludwig.api_annotations import DeveloperAPI
 from ludwig.constants import AUTO, COMBINED, LOSS
 from ludwig.models.base import BaseModel
+from ludwig.models.ecd import ECD
 from ludwig.modules.metric_modules import get_best_function
 from ludwig.utils.data_utils import save_json
 from ludwig.utils.metric_utils import TrainerMetric
@@ -408,3 +410,16 @@ def get_rendered_batch_size_grad_accum(config: "BaseTrainerConfig", num_workers:
  gradient_accumulation_steps = 1
 
  return batch_size, gradient_accumulation_steps
+
+
+def freeze_layers_regex(config: "BaseTrainerConfig", model: ECD) -> None:
+ """Freezes layers based on provided regular expression."""
+ try:
+ pattern = re.compile(config.layers_to_freeze_regex)
+ except re.error:
+ logger.warning("Invalid regex input.\n")
+ exit()
+
+ for name, p in model.named_parameters():
+ if re.search(pattern, str(name)):
+ p.requires_grad = False
@@ -268,6 +268,24 @@ def test_collect_summary_activations_weights_cli(tmpdir, csv_filename):
  assert _run_ludwig("collect_summary", model=os.path.join(tmpdir, "experiment_run", "model"))
 
 
+@pytest.mark.parametrize(
+ "model_name",
+ [
+ "alexnet",
+ "convnext_base",
+ "convnext_large",
+ "convnext_small",
+ "convnext_tiny",
+ "densenet121",
+ "densenet161",
+ "densenet169",
+ ],
+)
+def test_pretrained_summary_cli(model_name: str):
+ """Test pretrained_summary cli."""
+ _run_ludwig("pretrained_summary", model_name=model_name)
+
+
 def test_synthesize_dataset_cli(tmpdir, csv_filename):
  """Test synthesize_data cli."""
  # test depends on default setting of --dataset_size

@@ -0,0 +1,70 @@
+import re
+from contextlib import nullcontext as no_error_raised
+
+import pytest
+
+from ludwig.api import LudwigModel
+from ludwig.constants import TRAINER
+from ludwig.encoders.image.torchvision import TVEfficientNetEncoder
+from ludwig.schema.trainer import BaseTrainerConfig
+from ludwig.utils.misc_utils import set_random_seed
+from ludwig.utils.trainer_utils import freeze_layers_regex
+from tests.integration_tests.utils import category_feature, generate_data, image_feature
+
+RANDOM_SEED = 130
+
+
+@pytest.mark.parametrize(
+ "regex",
+ [
+ r"(features\.1.*|features\.2.*|features\.3.*|model\.features\.4\.1\.block\.3\.0\.weight)",
+ r"(features\.1.*|features\.2\.*|features\.3.*)",
+ r"(features\.4\.0\.block|features\.4\.\d+\.block)",
+ r"(features\.5\.*|features\.6\.*|features\.7\.*)",
+ r"(features\.8\.\d+\.weight|features\.8\.\d+\.bias)",
+ ],
+)
+def test_tv_efficientnet_freezing(regex):
+ set_random_seed(RANDOM_SEED)
+
+ pretrained_model = TVEfficientNetEncoder(
+ model_variant="b0", use_pretrained=False, saved_weights_in_checkpoint=True, trainable=True
+ )
+
+ config = BaseTrainerConfig(layers_to_freeze_regex=regex)
+ freeze_layers_regex(config, pretrained_model)
+ for name, param in pretrained_model.named_parameters():
+ if re.search(re.compile(regex), name):
+ assert not param.requires_grad
+ else:
+ assert param.requires_grad
+
+
+def test_frozen_tv_training(tmpdir, csv_filename):
+ input_features = [image_feature(tmpdir)]
+ output_features = [category_feature()]
+
+ config = {
+ "input_features": input_features,
+ "output_features": output_features,
+ TRAINER: {
+ "layers_to_freeze_regex": r"(features\.1.*|features\.2.*|model\.features\.4\.1\.block\.3\.0\.weight)",
+ "epochs": 1,
+ "train_steps": 1,
+ },
+ "encoder": {"type": "efficientnet", "use_pretrained": False},
+ }
+
+ training_data_csv_path = generate_data(config["input_features"], config["output_features"], csv_filename)
+ model = LudwigModel(config)
+
+ with no_error_raised():
+ model.experiment(
+ dataset=training_data_csv_path,
+ skip_save_training_description=True,
+ skip_save_training_statistics=True,
+ skip_save_model=True,
+ skip_save_progress=True,
+ skip_save_log=True,
+ skip_save_processed_input=True,
+ )