diff --git a/guide.ipynb b/guide.ipynb index ba1e3a2..ce2119c 100644 --- a/guide.ipynb +++ b/guide.ipynb @@ -15,13 +15,12 @@ ] }, { - "cell_type": "code", - "execution_count": 2, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "%load_ext autoreload\n", - "%autoreload 2" + "Reproducible code for \"A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME\" by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022)\n", + "\n", + "Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk)" ] }, { @@ -57,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -80,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -96,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -116,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -129,11 +128,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "# Save the predictions\n", + "# Save the predictions as a .nc file\n", + "## Pass save_dir argument to specify path and filename format (year+month+.txt will be appended)\n", + "## Otherwise default is used, saved in the data/emulated_fps folder\n", "predictions.save_predictions(info)" ] }, diff --git a/train_emulator.py b/train_emulator.py index 9887e18..7fab35e 100644 --- a/train_emulator.py +++ b/train_emulator.py @@ -5,6 +5,13 @@ import argparse +""" +Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" +by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022) + +Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk) +""" + parser = argparse.ArgumentParser(prog = "tree_emulator", description='Train footprint emulator') parser.add_argument('site', type=str, help='Site to train on, as string (eg "MHD")') parser.add_argument('year', help='Time period to train on. Can be int (2016) or str ("201[4-5]")') @@ -61,4 +68,3 @@ - diff --git a/trees_emulator/load_data.py b/trees_emulator/load_data.py index 975583d..cdf2c6b 100644 --- a/trees_emulator/load_data.py +++ b/trees_emulator/load_data.py @@ -3,6 +3,13 @@ import glob import dask +""" +Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" +by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022) + +Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk) +""" + class LoadData: """ Load data for training and testing, for a particular site diff --git a/trees_emulator/predicting.py b/trees_emulator/predicting.py index 125cfd9..4d18620 100644 --- a/trees_emulator/predicting.py +++ b/trees_emulator/predicting.py @@ -15,6 +15,14 @@ import sklearn.metrics as metrics import xarray as xr +""" +Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" +by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022) + +Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk) +""" + + class MakePredictions: """ Make footprint predictions @@ -99,9 +107,12 @@ def save_predictions(self, info, save_dir=None, year=None): def predict_fluxes(self, flux, units_transform = "default"): - ## convolute predicted footprints and fluxes, returns two np arrays, one with the true flux and one with the emulated flux, of shape (n_footprints,) - ## flux is an array, regridded and cut to the same resolution and size of the footprints - ## units_transform can be None (use fluxes directly), "default" (performs flux*1e3 / CH4molarmass) or another function (which should return an array of the same shape as the original flux) + """ + convolute predicted footprints and fluxes. + Returns two np arrays, one with the true flux and one with the emulated flux, both of shape (n_footprints,) + input flux should be an array, regridded and cut to the same resolution and size of the footprints + units_transform can be None (use fluxes directly), "default" (performs flux*1e3 / CH4molarmass) or another function (which should return an array of the same shape as the original flux) + """ shape = self.size if units_transform != None: if units_transform == "default": diff --git a/trees_emulator/training.py b/trees_emulator/training.py index 4d9c810..43a6247 100644 --- a/trees_emulator/training.py +++ b/trees_emulator/training.py @@ -1,10 +1,25 @@ import numpy as np from sklearn.ensemble import GradientBoostingRegressor +""" +Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" +by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022) + +Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk) +""" + def train_tree(data, inputs, frequency, hours_back, tree): """ - Train a GBRT (ie the regressor for a single cell) using parameters provided. Returns a trained sklearn regressor - + Train a GBRT (ie the regressor for a single cell) using parameters provided following method from paper. + Returns a trained sklearn regressor + + Requires inputs: + - data (LoadData object) + - inputs (meteorological inputs) + - frequency (freq to sample footprints) + - hours_back (same parameter as used to produce inputs) + - tree (int ID of cell to train, if all cells are flattened. tree is then unraveled into x,y coordinates within the domain, eg tree 0 is at loc 0,0) + Parallel predicting function is predict_tree. Edit/replace both of these for custom training/predicting """