diff --git a/guide.ipynb b/guide.ipynb
index ba1e3a2..ce2119c 100644
--- a/guide.ipynb
+++ b/guide.ipynb
@@ -15,13 +15,12 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 2,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
+    "Reproducible code for \"A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME\" by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022)\n",
+    "\n",
+    "Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk)"
    ]
   },
   {
@@ -57,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -80,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -96,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -116,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -129,11 +128,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Save the predictions\n",
+    "# Save the predictions as a .nc file\n",
+    "## Pass save_dir argument to specify path and filename format (year+month+.txt will be appended)\n",
+    "## Otherwise default is used, saved in the data/emulated_fps folder\n",
     "predictions.save_predictions(info)"
    ]
   },
diff --git a/train_emulator.py b/train_emulator.py
index 9887e18..7fab35e 100644
--- a/train_emulator.py
+++ b/train_emulator.py
@@ -5,6 +5,13 @@
 
 import argparse
 
+"""
+Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" 
+by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022)
+
+Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk)
+"""
+
 parser = argparse.ArgumentParser(prog = "tree_emulator", description='Train footprint emulator')
 parser.add_argument('site', type=str,  help='Site to train on, as string (eg "MHD")')
 parser.add_argument('year', help='Time period to train on. Can be int (2016) or str ("201[4-5]")')
@@ -61,4 +68,3 @@
 
     
 
-
diff --git a/trees_emulator/load_data.py b/trees_emulator/load_data.py
index 975583d..cdf2c6b 100644
--- a/trees_emulator/load_data.py
+++ b/trees_emulator/load_data.py
@@ -3,6 +3,13 @@
 import glob
 import dask
 
+"""
+Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" 
+by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022)
+
+Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk)
+"""
+
 class LoadData:
     """
     Load data for training and testing, for a particular site
diff --git a/trees_emulator/predicting.py b/trees_emulator/predicting.py
index 125cfd9..4d18620 100644
--- a/trees_emulator/predicting.py
+++ b/trees_emulator/predicting.py
@@ -15,6 +15,14 @@
 import sklearn.metrics as metrics
 import xarray as xr
 
+"""
+Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" 
+by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022)
+
+Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk)
+"""
+
+
 class MakePredictions:
     """
     Make footprint predictions
@@ -99,9 +107,12 @@ def save_predictions(self, info, save_dir=None, year=None):
 
 
     def predict_fluxes(self, flux, units_transform = "default"):
-        ## convolute predicted footprints and fluxes, returns two np arrays, one with the true flux and one with the emulated flux, of shape (n_footprints,)
-        ## flux is an array, regridded and cut to the same resolution and size of the footprints
-        ## units_transform can be None (use fluxes directly), "default" (performs flux*1e3 / CH4molarmass) or another function (which should return an array of the same shape as the original flux)
+        """
+        convolute predicted footprints and fluxes.
+        Returns two np arrays, one with the true flux and one with the emulated flux, both of shape (n_footprints,)
+        input flux should be an array, regridded and cut to the same resolution and size of the footprints
+        units_transform can be None (use fluxes directly), "default" (performs flux*1e3 / CH4molarmass) or another function (which should return an array of the same shape as the original flux)
+        """
         shape = self.size 
         if units_transform != None:
             if units_transform == "default":
diff --git a/trees_emulator/training.py b/trees_emulator/training.py
index 4d9c810..43a6247 100644
--- a/trees_emulator/training.py
+++ b/trees_emulator/training.py
@@ -1,10 +1,25 @@
 import numpy as np
 from sklearn.ensemble import GradientBoostingRegressor
 
+"""
+Reproducible code for "A machine learning emulator for Lagrangian particle dispersion model footprints: A case study using NAME" 
+by Elena Fillola, Raul Santos-Rodriguez, Alistair Manning, Simon O'Doherty and Matt Rigby (2022)
+
+Author: Elena Fillola (elena.fillolamayoral@bristol.ac.uk)
+"""
+
 def train_tree(data, inputs, frequency, hours_back, tree):
     """
-    Train a GBRT (ie the regressor for a single cell) using parameters provided. Returns a trained sklearn regressor
-
+    Train a GBRT (ie the regressor for a single cell) using parameters provided following method from paper. 
+    Returns a trained sklearn regressor
+    
+    Requires inputs:
+    - data (LoadData object)
+    - inputs (meteorological inputs)
+    - frequency (freq to sample footprints)
+    - hours_back (same parameter as used to produce inputs)
+    - tree (int ID of cell to train, if all cells are flattened. tree is then unraveled into x,y coordinates within the domain, eg tree 0 is at loc 0,0)
+    
     Parallel predicting function is predict_tree. Edit/replace both of these for custom training/predicting 
     """