Skip to content

Commit

Permalink
...
Browse files Browse the repository at this point in the history
  • Loading branch information
svpino committed Apr 25, 2024
1 parent 1cc345f commit 6132940
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 22 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,6 @@ dmypy.json
.DS_Store
program/code/
_proc/
.metaflow
.metaflow
mlruns/
mlartifacts/
60 changes: 60 additions & 0 deletions program/metaflow/mlflow.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([4., 3.])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"\n",
"metrics = [[3, 2], [3, 2], [6, 5]]\n",
"\n",
"# Compute the mean using the first value of the tuple in metrics\n",
"\n",
"\n",
"accuracy = np.mean(metrics, axis=0)\n",
"accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
3 changes: 2 additions & 1 deletion program/metaflow/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
metaflow
metaflow
mlflow
106 changes: 86 additions & 20 deletions program/metaflow/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def build_tuner_model(hp):
"keras": "3.3.0",
"jax[cpu]": "0.4.26",
"packaging": "24.0",
"mlflow": "2.12.1",
},
)
class TrainingFlow(FlowSpec):
Expand All @@ -125,6 +126,12 @@ class TrainingFlow(FlowSpec):

@step
def start(self):
import mlflow
from metaflow import current

run = mlflow.start_run(run_name=current.run_id)
self.mlflow_run_id = run.info.run_id

self.next(self.load_data)

@pypi(packages={"boto3": "1.34.70"})
Expand Down Expand Up @@ -215,23 +222,38 @@ def transform_features_fold(self):
@step
def train_model_fold(self):
"""Train a model as part of the cross-validation process."""
import mlflow

print(f"Training fold {self.fold}...")

self.model = build_model(10, 0.01)
with (
mlflow.start_run(run_id=self.mlflow_run_id),
mlflow.start_run(
run_name=f"cross-validation-fold-{self.fold}",
nested=True,
) as run,
):
self.mlflow_fold_run_id = run.info.run_id

self.model.fit(
self.x_train,
self.y_train,
epochs=50,
batch_size=32,
verbose=2,
)
mlflow.autolog()

self.model = build_model(10, 0.01)

self.model.fit(
self.x_train,
self.y_train,
epochs=50,
batch_size=32,
verbose=0,
)

self.next(self.evaluate_model_fold)

@step
def evaluate_model_fold(self):
"""Evaluate a model created as part of the cross-validation process."""
import mlflow

print(f"Evaluating fold {self.fold}...")

self.loss, self.accuracy = self.model.evaluate(
Expand All @@ -240,18 +262,41 @@ def evaluate_model_fold(self):
verbose=2,
)

with mlflow.start_run(run_id=self.mlflow_fold_run_id):
mlflow.log_metrics(
{
"test_loss": self.loss,
"test_accuracy": self.accuracy,
},
)

print(f"Fold {self.fold} - loss: {self.loss} - accuracy: {self.accuracy}")
self.next(self.evaluate_model)

@step
def evaluate_model(self, inputs):
import mlflow
import numpy as np

accuracies = [i.accuracy for i in inputs]
accuracy = np.mean(accuracies)
accuracy_std = np.std(accuracies)
self.merge_artifacts(inputs, include=["mlflow_run_id"])

metrics = [[i.accuracy, i.loss] for i in inputs]

print(f"Accuracy: {accuracy} +-{accuracy_std}")
accuracy, loss = np.mean(metrics, axis=0)
accuracy_std, loss_std = np.std(metrics, axis=0)

print(f"Accuracy: {accuracy} ±{accuracy_std}")
print(f"Loss: {loss} ±{loss_std}")

with mlflow.start_run(run_id=self.mlflow_run_id):
mlflow.log_metrics(
{
"cross_validation_accuracy": accuracy,
"cross_validation_accuracy_std": accuracy_std,
"cross_validation_loss": loss,
"cross_validation_loss_std": loss_std,
},
)

self.next(self.train_model)

Expand All @@ -261,17 +306,38 @@ def train_model(self, inputs):
This function will use the entire dataset to train the model.
"""
import mlflow
from mlflow.models import infer_signature

self.merge_artifacts(inputs)

self.model = build_model(10, 0.01)
with mlflow.start_run(run_id=self.mlflow_run_id):
mlflow.autolog()

self.model.fit(
self.x,
self.y,
epochs=50,
batch_size=32,
verbose=2,
)
self.model = build_model(10, 0.01)

params = {
"epochs": 50,
"batch_size": 32,
}

self.model.fit(
self.x,
self.y,
verbose=2,
**params,
)

mlflow.log_params(params)

signature = infer_signature(self.x, self.y)

mlflow.keras.log_model(
self.model,
"model",
signature=signature,
registered_model_name="penguins",
)

self.next(self.end)

Expand Down

0 comments on commit 6132940

Please sign in to comment.