Release 1.5.1 (#504)

ashleve · Jan 14, 2023 · 24b937c · 24b937c
2 parents 37b57ce + b4166b3
commit 24b937c
Show file tree

Hide file tree

Showing 24 changed files with 117 additions and 194 deletions.
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -23,7 +23,7 @@ categories:
  - "documentation"
  - "docs"
 
-change-template: "- $TITLE (#$NUMBER)"
+change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
 change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
 
 version-resolver:

diff --git a/.github/workflows/code-quality-pr.yaml b/.github/workflows/code-quality-pr.yaml
@@ -8,7 +8,7 @@ name: Code Quality PR
 
 on:
  pull_request:
- branches: [main, "release/*"]
+ branches: [main, "release/*", "dev"]
 
 jobs:
  code-quality:

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -4,7 +4,7 @@ on:
  push:
  branches: [main]
  pull_request:
- branches: [main, "release/*"]
+ branches: [main, "release/*", "dev"]
 
 jobs:
  run_tests_ubuntu:

diff --git a/.gitignore b/.gitignore
@@ -146,7 +146,6 @@ dmypy.json
 
 # Lightning-Hydra-Template
 configs/local/default.yaml
-data/
-logs/
+/data/
+/logs/
 .env
-.autoenv
diff --git a/.project-root b/.project-root
@@ -0,0 +1,2 @@
+# this file is required for inferring the project root directory
+# do not delete
diff --git a/README.md b/README.md
@@ -3,9 +3,9 @@
 # Lightning-Hydra-Template
 
 [![python](https://img.shields.io/badge/-Python_3.7_%7C_3.8_%7C_3.9_%7C_3.10-blue?logo=python&logoColor=white)](https://github.com/pre-commit/pre-commit)
-[![pytorch](https://img.shields.io/badge/PyTorch_1.8+-ee4c2c?logo=pytorch&logoColor=white)](https://pytorch.org/get-started/locally/)
-[![lightning](https://img.shields.io/badge/-Lightning_1.6+-792ee5?logo=pytorchlightning&logoColor=white)](https://pytorchlightning.ai/)
-[![hydra](https://img.shields.io/badge/Config-Hydra_1.2-89b8cd)](https://hydra.cc/)
+[![pytorch](https://img.shields.io/badge/PyTorch_1.10+-ee4c2c?logo=pytorch&logoColor=white)](https://pytorch.org/get-started/locally/)
+[![lightning](https://img.shields.io/badge/-Lightning_1.8+-792ee5?logo=pytorchlightning&logoColor=white)](https://pytorchlightning.ai/)
+[![hydra](https://img.shields.io/badge/Config-Hydra_1.3-89b8cd)](https://hydra.cc/)
 [![black](https://img.shields.io/badge/Code%20Style-Black-black.svg?labelColor=gray)](https://black.readthedocs.io/en/stable/)
 [![pre-commit](https://img.shields.io/badge/Pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
 [![tests](https://github.com/ashleve/lightning-hydra-template/actions/workflows/test.yml/badge.svg)](https://github.com/ashleve/lightning-hydra-template/actions/workflows/test.yml)
@@ -14,14 +14,7 @@
 [![license](https://img.shields.io/badge/License-MIT-green.svg?labelColor=gray)](https://github.com/ashleve/lightning-hydra-template#license)
 [![contributors](https://img.shields.io/github/contributors/ashleve/lightning-hydra-template.svg)](https://github.com/ashleve/lightning-hydra-template/graphs/contributors)
 
-<!-- <a href="https://www.python.org/"><img alt="Python" src="https://img.shields.io/badge/-Python 3.7+-blue?style=for-the-badge&logo=python&logoColor=white"></a> -->
-
-<!-- <a href="https://pytorch.org/get-started/locally/"><img alt="PyTorch" src="https://img.shields.io/badge/-PyTorch 1.8+-ee4c2c?style=for-the-badge&logo=pytorch&logoColor=white"></a>
-<a href="https://pytorchlightning.ai/"><img alt="Lightning" src="https://img.shields.io/badge/-Lightning 1.6+-792ee5?style=for-the-badge&logo=pytorchlightning&logoColor=white"></a>
-<a href="https://hydra.cc/"><img alt="Config: hydra" src="https://img.shields.io/badge/config-hydra 1.2-89b8cd?style=for-the-badge&labelColor=gray"></a>
-<a href="https://black.readthedocs.io/en/stable/"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-black.svg?style=for-the-badge&labelColor=gray"></a> -->
-
-A clean and scalable template to kickstart your deep learning project 🚀⚡🔥<br>
+A clean template to kickstart your deep learning project 🚀⚡🔥<br>
 Click on [<kbd>Use this template</kbd>](https://github.com/ashleve/lightning-hydra-template/generate) to initialize new repository.
 
 _Suggestions are always welcome!_
@@ -32,47 +25,22 @@ _Suggestions are always welcome!_
 
 ## 📌  Introduction
 
-**Why you should use it:**
+**Why you might want to use it:**
 
-- Convenient all-in-one technology stack for deep learning prototyping - allows you to rapidly iterate over new models, datasets and tasks on different hardware accelerators like CPUs, multi-GPUs or TPUs.
-- A collection of best practices for efficient workflow and reproducibility.
-- Thoroughly commented - you can use this repo as a reference and educational resource.
+- Convenient technology stack for deep learning prototyping - allows you to rapidly iterate over new models, datasets and tasks on different hardware accelerators like CPUs, multi-GPUs or TPUs.
+- Thoroughly commented - you can use this repo as an educational resource.
+- A collection of useful tools, configs, and code snippets - you can use this repo as a reference for various utilities, e.g. Makefile, pre-commit hooks or smoke tests.
 
-**Why you shouldn't use it:**
+**Why you might not want to use it:**
 
 - Lightning and Hydra are still evolving and integrate many libraries, which means sometimes things break - for the list of currently known problems visit [this page](https://github.com/ashleve/lightning-hydra-template/labels/bug).
-- Template is not really adjusted for data science and building data pipelines that depend on each other (it's much more useful for model prototyping on ready-to-use data).
-- The configuration setup is built with simple lightning training in mind (you might need to put some effort to adjust it for different use cases, e.g. lightning lite).
-- Limits you as much as pytorch lightning limits you.
+- Template is not really adjusted for data science and building data pipelines that depend on each other - it's much efficient to use it for model prototyping on ready-to-use data.
+- The configuration setup is built with simple lightning training in mind - you might need to put some effort to adjust it for different use cases, e.g. lightning lite.
 
 _\*keep in mind this is unofficial community project_
 
 <br>
 
-<!-- ## Table Of Contents
-
-- [📌&nbsp;&nbsp;Introduction](#introduction)
-- [Main Technologies](#main-technologies)
-- [Main Ideas Of This Template](#main-ideas-of-this-template)
-- [Project Structure](#project-structure)
-- [🚀&nbsp;&nbsp;Quickstart](#quickstart)
-- [⚡&nbsp;&nbsp;Your Superpowers](#your-superpowers)
-- [❤️&nbsp;&nbsp;Contributions](#️contributions)
-- [How It Works](#how-it-works)
-- [Main Config](#main-config)
-- [Experiment Config](#experiment-config)
-- [Workflow](#workflow)
-- [Logs](#logs)
-- [Experiment Tracking](#experiment-tracking)
-- [Tests](#tests)
-- [Hyperparameter Search](#hyperparameter-search)
-- [Continuous Integration](#continuous-integration)
-- [Distributed Training](#distributed-training)
-- [Best Practices](#best-practices)
-- [Resources](#resources)
-
-<br> -->
-
 ## Main Technologies
 
 [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning) - a lightweight PyTorch wrapper for high-performance AI research. Think of it as a framework for organizing your PyTorch code.
@@ -81,20 +49,20 @@ _\*keep in mind this is unofficial community project_
 
 <br>
 
-## Main Ideas Of This Template
-
-- **Predefined Structure**: clean and scalable so that work can easily be extended [# Project Structure](#project-structure)
-- **Rapid Experimentation**: thanks to hydra command line superpowers | [# Your Superpowers](#your-superpowers)
-- **Little Boilerplate**: thanks to automating pipelines with config instantiation | [# How It Works](#how-it-works)
-- **Main Configs**: specify default training configuration | [# Main Config](#main-config)
-- **Experiment Configs**: override chosen hyperparameters | [# Experiment Config](#experiment-config)
-- **Workflow**: comes down to 4 simple steps | [# Workflow](#workflow)
-- **Experiment Tracking**: Tensorboard, W&B, Neptune, Comet, MLFlow and CSVLogger | [# Experiment Tracking](#experiment-tracking)
-- **Logs**: all logs (checkpoints, configs, etc.) are stored in a dynamically generated folder structure | [# Logs](#logs)
-- **Hyperparameter Search**: made easier with Hydra plugins like Optuna Sweeper | [# Hyperparameter Search](#hyperparameter-search)
-- **Tests**: generic, easy-to-adapt tests for speeding up the development | [# Tests](#tests)
-- **Continuous Integration**: automatically test your repo with Github Actions | [# Continuous Integration](#continuous-integration)
-- **Best Practices**: a couple of recommended tools, practices and standards | [# Best Practices](#best-practices)
+## Main Ideas
+
+- [**Predefined Structure**](#project-structure): clean and scalable so that work can easily be extended
+- [**Rapid Experimentation**](#your-superpowers): thanks to hydra command line superpowers
+- [**Little Boilerplate**](#how-it-works): thanks to automating pipelines with config instantiation
+- [**Main Configs**](#main-config): allow to specify default training configuration
+- [**Experiment Configs**](#experiment-config): allow to override chosen hyperparameters
+- [**Workflow**](#workflow): comes down to 4 simple steps
+- [**Experiment Tracking**](#experiment-tracking): Tensorboard, W&B, Neptune, Comet, MLFlow and CSVLogger
+- [**Logs**](#logs): all logs (checkpoints, configs, etc.) are stored in a dynamically generated folder structure
+- [**Hyperparameter Search**](#hyperparameter-search): made easier with Hydra plugins like Optuna Sweeper
+- [**Tests**](#tests): generic, easy-to-adapt tests for speeding up the development
+- [**Continuous Integration**](#continuous-integration): automatically test your repo with Github Actions
+- [**Best Practices**](#best-practices): a couple of recommended tools, practices and standards
 
 <br>
 
@@ -105,7 +73,7 @@ The directory structure of new project looks like this:
 ```
 ├── configs <- Hydra configuration files
 │ ├── callbacks <- Callbacks configs
-│ ├── datamodule <- Datamodule configs
+│ ├── data  <- Data configs
 │ ├── debug <- Debugging configs
 │ ├── experiment <- Experiment configs
 │ ├── extras <- Extra utilities configs
@@ -131,7 +99,7 @@ The directory structure of new project looks like this:
 ├── scripts <- Shell scripts
 │
 ├── src <- Source code
-│ ├── datamodules <- Lightning datamodules
+│ ├── data  <- Lightning datamodules
 │ ├── models <- Lightning models
 │ ├── utils <- Utility scripts
 │ │
@@ -379,7 +347,7 @@ python eval.py ckpt_path="/path/to/ckpt/name.ckpt"
 ```bash
 # this will run 6 experiments one after the other,
 # each with different combination of batch_size and learning rate
-python train.py -m datamodule.batch_size=32,64,128 model.lr=0.001,0.0005
+python train.py -m data.batch_size=32,64,128 model.lr=0.001,0.0005
 ```
 
 > **Note**: Hydra composes configs lazily at job launch time. If you change code or configs after launching a job/sweep, the final composed configs might be impacted.
@@ -504,6 +472,10 @@ ValueError: Specify tags before launching a multirun!
 
 ## ❤️  Contributions
 
+This project exists thanks to all the people who contribute.
+
+![Contributors](https://readme-contributors.now.sh/ashleve/lightning-hydra-template?extension=jpg&width=400&aspectRatio=1)
+
 Have a question? Found a bug? Missing a specific feature? Feel free to file a new issue, discussion or PR with respective title and description.
 
 Before making an issue, please verify that:
@@ -562,7 +534,7 @@ It determines how config is composed when simply executing command `python train
 # order of defaults determines the order in which configs override each other
 defaults:
  - _self_
- - datamodule: mnist.yaml
+ - data: mnist.yaml
  - model: mnist.yaml
  - callbacks: default.yaml
  - logger: null # set logger here or use command line (e.g. `python train.py logger=csv`)
@@ -629,7 +601,7 @@ For example, you can use them to version control best hyperparameters for each c
 # python train.py experiment=example
 
 defaults:
- - override /datamodule: mnist.yaml
+ - override /data: mnist.yaml
  - override /model: mnist.yaml
  - override /callbacks: default.yaml
  - override /trainer: default.yaml
@@ -654,7 +626,7 @@ model:
  lin2_size: 256
  lin3_size: 64
 
-datamodule:
+data:
  batch_size: 64
 
 logger:
@@ -672,7 +644,7 @@ logger:
 **Basic workflow**
 
 1. Write your PyTorch Lightning module (see [models/mnist_module.py](src/models/mnist_module.py) for example)
-2. Write your PyTorch Lightning datamodule (see [datamodules/mnist_datamodule.py](src/datamodules/mnist_datamodule.py) for example)
+2. Write your PyTorch Lightning datamodule (see [data/mnist_datamodule.py](src/data/mnist_datamodule.py) for example)
 3. Write your experiment config, containing paths to model and datamodule
 4. Run training with chosen experiment config:
  ```bash
@@ -686,7 +658,7 @@ _Say you want to execute many runs to plot how accuracy changes in respect to ba
 1. Execute the runs with some config parameter that allows you to identify them easily, like tags:
 
  ```bash
- python train.py -m logger=csv datamodule.batch_size=16,32,64,128 tags=["batch_size_exp"]
+ python train.py -m logger=csv data.batch_size=16,32,64,128 tags=["batch_size_exp"]
  ```
 
 2. Write a script or notebook that searches over the `logs/` folder and retrieves csv logs from runs containing given tags in config. Plot the results.
@@ -814,7 +786,7 @@ hydra:
  # define hyperparameter search space
  params:
  model.optimizer.lr: interval(0.0001, 0.1)
- datamodule.batch_size: choice(32, 64, 128, 256)
+ data.batch_size: choice(32, 64, 128, 256)
  model.net.lin1_size: choice(64, 128, 256)
  model.net.lin2_size: choice(64, 128, 256)
  model.net.lin3_size: choice(32, 64, 128, 256)
@@ -866,7 +838,7 @@ The simplest way is to pass datamodule attribute directly to model on initializa
 
 ```python
 # ./src/train.py
-datamodule = hydra.utils.instantiate(config.datamodule)
+datamodule = hydra.utils.instantiate(config.data)
 model = hydra.utils.instantiate(config.model, some_param=datamodule.some_param)
 ```
 
@@ -876,7 +848,7 @@ Similarly, you can pass a whole datamodule config as an init parameter:
 
 ```python
 # ./src/train.py
-model = hydra.utils.instantiate(config.model, dm_conf=config.datamodule, _recursive_=False)
+model = hydra.utils.instantiate(config.model, dm_conf=config.data, _recursive_=False)
 ```
 
 You can also pass a datamodule config parameter to your model through variable interpolation:
@@ -885,7 +857,7 @@ You can also pass a datamodule config parameter to your model through variable i
 # ./configs/model/my_model.yaml
 _target_: src.models.my_module.MyLitModule
 lr: 0.01
-some_param: ${datamodule.some_param}
+some_param: ${data.some_param}
 ```
 
 Another approach is to access datamodule in LightningModule directly through Trainer:
@@ -1139,7 +1111,7 @@ So any file can be easily imported into any other file like so:
 
 ```python
 from project_name.models.mnist_module import MNISTLitModule
-from project_name.datamodules.mnist_datamodule import MNISTDataModule
+from project_name.data.mnist_datamodule import MNISTDataModule
 ```
 
 </details>
@@ -1187,7 +1159,6 @@ Other useful repositories:
 - [jxpress/lightning-hydra-template-vertex-ai](https://github.com/jxpress/lightning-hydra-template-vertex-ai) - lightning-hydra-template integration with Vertex AI hyperparameter tuning and custom training job
 - [pytorch/hydra-torch](https://github.com/pytorch/hydra-torch) - safely configuring PyTorch classes with Hydra
 - [romesco/hydra-lightning](https://github.com/romesco/hydra-lightning) - safely configuring PyTorch Lightning classes with Hydra
-- [PyTorchLightning/lightning-transformers](https://github.com/PyTorchLightning/lightning-transformers) - official Lightning Transformers repo built with Hydra
 
 </details>
 
@@ -1286,5 +1257,5 @@ python src/train.py experiment=experiment_name.yaml
 You can override any parameter from command line like this
 
 ```bash
-python src/train.py trainer.max_epochs=20 datamodule.batch_size=64
+python src/train.py trainer.max_epochs=20 data.batch_size=64
 ```
diff --git a/configs/datamodule/mnist.yaml → configs/data/mnist.yaml b/configs/datamodule/mnist.yaml → configs/data/mnist.yaml
@@ -1,4 +1,4 @@
-_target_: src.datamodules.mnist_datamodule.MNISTDataModule
+_target_: src.data.mnist_datamodule.MNISTDataModule
 data_dir: ${paths.data_dir}
 batch_size: 128
 train_val_test_split: [55_000, 5_000, 10_000]

diff --git a/configs/debug/default.yaml b/configs/debug/default.yaml
@@ -30,6 +30,6 @@ trainer:
  devices: 1 # debuggers don't like multiprocessing
  detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
 
-datamodule:
+data:
  num_workers: 0 # debuggers don't like multiprocessing
  pin_memory: False # disable gpu memory pin
diff --git a/configs/eval.yaml b/configs/eval.yaml
@@ -2,7 +2,7 @@
 
 defaults:
  - _self_
- - datamodule: mnist.yaml # choose datamodule with `test_dataloader()` for evaluation
+ - data: mnist.yaml # choose datamodule with `test_dataloader()` for evaluation
  - model: mnist.yaml
  - logger: null
  - trainer: default.yaml

diff --git a/configs/experiment/example.yaml b/configs/experiment/example.yaml
@@ -4,7 +4,7 @@
 # python train.py experiment=example
 
 defaults:
- - override /datamodule: mnist.yaml
+ - override /data: mnist.yaml
  - override /model: mnist.yaml
  - override /callbacks: default.yaml
  - override /trainer: default.yaml
@@ -29,7 +29,7 @@ model:
  lin2_size: 256
  lin3_size: 64
 
-datamodule:
+data:
  batch_size: 64
 
 logger:

diff --git a/configs/hparams_search/mnist_optuna.yaml b/configs/hparams_search/mnist_optuna.yaml
@@ -46,7 +46,7 @@ hydra:
  # define hyperparameter search space
  params:
  model.optimizer.lr: interval(0.0001, 0.1)
- datamodule.batch_size: choice(32, 64, 128, 256)
+ data.batch_size: choice(32, 64, 128, 256)
  model.net.lin1_size: choice(64, 128, 256)
  model.net.lin2_size: choice(64, 128, 256)
  model.net.lin3_size: choice(32, 64, 128, 256)
diff --git a/configs/paths/default.yaml b/configs/paths/default.yaml
@@ -1,6 +1,6 @@
 # path to root directory
 # this requires PROJECT_ROOT environment variable to exist
-# PROJECT_ROOT is inferred and set by pyrootutils package in `train.py` and `eval.py`
+# you can replace it with "." if you want the root to be the current working directory
 root_dir: ${oc.env:PROJECT_ROOT}
 
 # path to data directory

diff --git a/configs/train.yaml b/configs/train.yaml
@@ -4,7 +4,7 @@
 # order of defaults determines the order in which configs override each other
 defaults:
  - _self_
- - datamodule: mnist.yaml
+ - data: mnist.yaml
  - model: mnist.yaml
  - callbacks: default.yaml
  - logger: null # set logger here or use command line (e.g. `python train.py logger=tensorboard`)

diff --git a/src/datamodules/__init__.py → src/data/__init__.py b/src/datamodules/__init__.py → src/data/__init__.py
diff --git a/src/datamodules/components/__init__.py → src/data/components/__init__.py b/src/datamodules/components/__init__.py → src/data/components/__init__.py
diff --git a/src/datamodules/mnist_datamodule.py → src/data/mnist_datamodule.py b/src/datamodules/mnist_datamodule.py → src/data/mnist_datamodule.py
@@ -128,11 +128,4 @@ def load_state_dict(self, state_dict: Dict[str, Any]):
 
 
 if __name__ == "__main__":
- import hydra
- import omegaconf
- import pyrootutils
-
- root = pyrootutils.setup_root(__file__, pythonpath=True)
- cfg = omegaconf.OmegaConf.load(root / "configs" / "datamodule" / "mnist.yaml")
- cfg.data_dir = str(root / "data")
- _ = hydra.utils.instantiate(cfg)
+ _ = MNISTDataModule()