Merge pull request #46 from SocialFinanceDigitalLabs/44-add-adjusted-…

…forecast-scenario-page 44 add adjusted forecast scenario page
SocialFinanceDigitalLabs · Jun 26, 2024 · 03d1699 · 03d1699
2 parents 8b1ada6 + ca03c34
commit 03d1699
Show file tree

Hide file tree

Showing 20 changed files with 1,491 additions and 28 deletions.
diff --git a/dm_regional_app/builder.py b/dm_regional_app/builder.py
@@ -44,8 +44,9 @@ def scenario(
  name: Optional[str] = None,
  description: Optional[str] = None,
  historic_filters: Optional[str] = None,
- prediction_filters: Optional[str] = None,
  prediction_parameters: Optional[str] = None,
+ adjusted_rates: Optional[str] = None,
+ adjusted_numbers: Optional[str] = None,
  historic_stock: Optional[str] = None,
  adjusted_costs: Optional[str] = None,
  **kwargs,
@@ -55,8 +56,9 @@ def scenario(
  name=name or self.fake.name(),
  description=description or self.fake.text(),
  historic_filters=historic_filters or self.fake.text(),
- prediction_filters=prediction_filters or self.fake.text(),
  prediction_parameters=prediction_parameters or self.fake.text(),
+ adjusted_rates=adjusted_rates or self.fake.text(),
+ adjusted_numbers=adjusted_numbers or self.fake.text(),
  historic_stock=historic_stock or self.fake.text(),
  adjusted_costs=adjusted_costs or self.fake.text(),
  **kwargs,

diff --git a/dm_regional_app/charts.py b/dm_regional_app/charts.py
@@ -13,7 +13,9 @@ def prediction_chart(historic_data: PopulationStats, prediction: Prediction, **k
 
  # dataframe containing total children in prediction
  df = prediction.population.unstack().reset_index()
+
  df.columns = ["from", "date", "forecast"]
+ df = df[df["from"].apply(lambda x: "Not in care" in x) == False]
  df = df[["date", "forecast"]].groupby(by="date").sum().reset_index()
  df["date"] = pd.to_datetime(df["date"]).dt.date
 
@@ -97,7 +99,7 @@ def prediction_chart(historic_data: PopulationStats, prediction: Prediction, **k
  )
 
  fig.update_layout(
- title="Base forecast", xaxis_title="Date", yaxis_title="Number of children"
+ title="Forecast", xaxis_title="Date", yaxis_title="Number of children"
  )
  fig.update_yaxes(rangemode="tozero")
  fig_html = fig.to_html(full_html=False)
@@ -123,3 +125,233 @@ def historic_chart(data: PopulationStats):
  fig.update_yaxes(rangemode="tozero")
  fig_html = fig.to_html(full_html=False)
  return fig_html
+
+
+def transition_rate_table(data):
+ df = data
+
+ df = df.reset_index()
+ df["To"] = df["to"]
+ df["From"] = df["from"]
+ df.set_index(["from", "to"], inplace=True)
+ df = df[df["To"].apply(lambda x: "Not in care" in x) == False]
+ df = df.round(4)
+ df = df.sort_values(by=["From"])
+ df = df[df["From"] != df["To"]]
+ df["From"] = df["From"].mask(df["From"].duplicated(), "")
+
+ to = df.pop("To")
+ df.insert(0, "To", to)
+ from_col = df.pop("From")
+ df.insert(0, "From", from_col)
+
+ df.columns = ["From", "To", "Base transition rate"]
+
+ return df
+
+
+def exit_rate_table(data):
+ df = data
+
+ df = df.reset_index()
+ df["From"] = df["from"]
+ df = df[df["to"].apply(lambda x: "Not in care" in x)]
+ df.set_index(["from", "to"], inplace=True)
+ df = df.round(4)
+
+ df[["Age Group", "Placement"]] = df["From"].str.split(" - ", expand=True)
+
+ placement = df.pop("Placement")
+ df.insert(0, "Placement", placement)
+
+ age_group = df.pop("Age Group")
+ df.insert(0, "Age Group", age_group)
+
+ df = df.drop(["From"], axis=1)
+
+ df["Age Group"] = df["Age Group"].mask(df["Age Group"].duplicated(), "")
+
+ df.columns = ["Age Group", "Placement", "Base exit rate"]
+
+ return df
+
+
+def entry_rate_table(data):
+ df = data
+
+ df = df.reset_index()
+ df["to"] = df["index"]
+ df = df[df["to"].apply(lambda x: "Not in care" in x) == False]
+
+ df = df.round(4)
+
+ df[["Age Group", "Placement"]] = df["to"].str.split(" - ", expand=True)
+ df.set_index(["to"], inplace=True)
+
+ placement = df.pop("Placement")
+ df.insert(0, "Placement", placement)
+
+ age_group = df.pop("Age Group")
+ df.insert(0, "Age Group", age_group)
+
+ df["Age Group"] = df["Age Group"].mask(df["Age Group"].duplicated(), "")
+
+ df = df.drop(["index"], axis=1)
+
+ df.columns = ["Age Group", "Placement", "Base entry rate"]
+
+ return df
+
+
+def compare_forecast(
+ historic_data: PopulationStats,
+ base_forecast: Prediction,
+ adjusted_forecast: Prediction,
+ **kwargs
+):
+ # pop start and end dates to visualise reference period
+ reference_start_date = kwargs.pop("reference_start_date")
+ reference_end_date = kwargs.pop("reference_end_date")
+
+ # dataframe containing total children in historic data
+ df_hd = historic_data.stock.unstack().reset_index()
+ df_hd.columns = ["from", "date", "historic"]
+ df_hd = df_hd[["date", "historic"]].groupby(by="date").sum().reset_index()
+ df_hd["date"] = pd.to_datetime(df_hd["date"]).dt.date
+
+ # dataframe containing total children in base forecast
+ df = base_forecast.population.unstack().reset_index()
+
+ df.columns = ["from", "date", "forecast"]
+ df = df[df["from"].apply(lambda x: "Not in care" in x) == False]
+ df = df[["date", "forecast"]].groupby(by="date").sum().reset_index()
+ df["date"] = pd.to_datetime(df["date"]).dt.date
+
+ # dataframe containing upper and lower confidence intervals for base forecast
+ df_ci = base_forecast.variance.unstack().reset_index()
+ df_ci.columns = ["bin", "date", "variance"]
+ df_ci = df_ci[["date", "variance"]].groupby(by="date").sum().reset_index()
+ df_ci["date"] = pd.to_datetime(df_ci["date"]).dt.date
+ df_ci["upper"] = df["forecast"] + df_ci["variance"]
+ df_ci["lower"] = df["forecast"] - df_ci["variance"]
+
+ # dataframe containing total children in adjusted forecast
+ df_af = adjusted_forecast.population.unstack().reset_index()
+
+ df_af.columns = ["from", "date", "forecast"]
+ df_af = df_af[df_af["from"].apply(lambda x: "Not in care" in x) == False]
+ df_af = df_af[["date", "forecast"]].groupby(by="date").sum().reset_index()
+ df_af["date"] = pd.to_datetime(df_af["date"]).dt.date
+
+ # dataframe containing upper and lower confidence intervals for adjusted forecast
+ df_df_ci = adjusted_forecast.variance.unstack().reset_index()
+ df_df_ci.columns = ["bin", "date", "variance"]
+ df_df_ci = df_df_ci[["date", "variance"]].groupby(by="date").sum().reset_index()
+ df_df_ci["date"] = pd.to_datetime(df_df_ci["date"]).dt.date
+ df_df_ci["upper"] = df_af["forecast"] + df_df_ci["variance"]
+ df_df_ci["lower"] = df_af["forecast"] - df_df_ci["variance"]
+
+ # visualise prediction using unstacked dataframe
+ fig = go.Figure()
+
+ # Display confidence interval as filled shape
+ fig.add_trace(
+ go.Scatter(
+ x=df_df_ci["date"],
+ y=df_df_ci["lower"],
+ line_color="rgba(255,255,255,0)",
+ name="Adjusted confidence interval",
+ showlegend=False,
+ )
+ )
+
+ fig.add_trace(
+ go.Scatter(
+ x=df_df_ci["date"],
+ y=df_df_ci["upper"],
+ fill="tonexty",
+ fillcolor="rgba(255,140,0,0.2)",
+ line_color="rgba(255,255,255,0)",
+ name="Adjusted confidence interval",
+ showlegend=True,
+ )
+ )
+
+ # Display confidence interval as filled shape
+ fig.add_trace(
+ go.Scatter(
+ x=df_ci["date"],
+ y=df_ci["lower"],
+ line_color="rgba(255,255,255,0)",
+ name="Base confidence interval",
+ showlegend=False,
+ )
+ )
+
+ fig.add_trace(
+ go.Scatter(
+ x=df_ci["date"],
+ y=df_ci["upper"],
+ fill="tonexty",
+ fillcolor="rgba(0,176,246,0.2)",
+ line_color="rgba(255,255,255,0)",
+ name="Base confidence interval",
+ showlegend=True,
+ )
+ )
+
+ # add base forecast for total children
+ fig.add_trace(
+ go.Scatter(
+ x=df_af["date"],
+ y=df_af["forecast"],
+ name="Adjusted Forecast",
+ line=dict(color="black", width=1.5, dash="dash"),
+ )
+ )
+
+ # add adjusted forecast for total children
+ fig.add_trace(
+ go.Scatter(
+ x=df["date"],
+ y=df["forecast"],
+ name="Base Forecast",
+ line=dict(color="black", width=1.5),
+ )
+ )
+
+ # add historic data for total children
+ fig.add_trace(
+ go.Scatter(
+ x=df_hd["date"],
+ y=df_hd["historic"],
+ name="Historic data",
+ line=dict(color="black", width=1.5, dash="dot"),
+ )
+ )
+
+ # add shaded reference period
+ fig.add_shape(
+ type="rect",
+ xref="x",
+ yref="paper",
+ x0=reference_start_date,
+ y0=0,
+ x1=reference_end_date,
+ y1=1,
+ line=dict(
+ width=0,
+ ),
+ label=dict(
+ text="Reference period", textposition="top center", font=dict(size=14)
+ ),
+ fillcolor="rgba(105,105,105,0.1)",
+ layer="above",
+ )
+
+ fig.update_layout(
+ title="Forecast", xaxis_title="Date", yaxis_title="Number of children"
+ )
+ fig.update_yaxes(rangemode="tozero")
+ fig_html = fig.to_html(full_html=False)
+ return fig_html
diff --git a/dm_regional_app/forms.py b/dm_regional_app/forms.py
@@ -1,9 +1,12 @@
+import pandas as pd
 from bootstrap_datepicker_plus.widgets import DatePickerInput
 from crispy_forms.helper import FormHelper
 from crispy_forms.layout import Column, Layout, Row, Submit
 from django import forms
 from django_select2 import forms as s2forms
 
+from dm_regional_app.utils import str_to_tuple
+
 
 class PredictFilter(forms.Form):
  reference_start_date = forms.DateField(
@@ -127,3 +130,62 @@ def __init__(self, *args, **kwargs):
  ),
  Submit("submit", "Filter"),
  )
+
+
+class DynamicForm(forms.Form):
+ def __init__(self, *args, **kwargs):
+ self.dataframe = kwargs.pop("dataframe", None)
+ initial_data = kwargs.pop("initial_data", pd.Series())
+
+ super(DynamicForm, self).__init__(*args, **kwargs)
+ self.initialize_fields(initial_data)
+
+ def initialize_fields(self, initial_data):
+ # adjusted rates will be None if user has not changed these before, so check
+ if initial_data is not None:
+ for index in self.dataframe.index:
+ field_name = str(index)
+ initial_value = None
+
+ # Attempt to get the initial value using the multiindex
+ try:
+ initial_value = initial_data.loc[index]
+ except KeyError:
+ initial_value = None
+
+ self.fields[field_name] = forms.FloatField(
+ required=False, initial=initial_value
+ )
+ else:
+ for index in self.dataframe.index:
+ field_name = str(index)
+ initial_value = None
+ self.fields[field_name] = forms.FloatField(
+ required=False, initial=initial_value
+ )
+
+ def save(self):
+ transition = []
+ transition_rate = []
+ for field_name, value in self.cleaned_data.items():
+ if value:
+ transition.append(field_name)
+ transition_rate.append(value)
+
+ data = pd.DataFrame(
+ {
+ "transition": transition,
+ "adjusted_rate": transition_rate,
+ }
+ )
+ data["transition"] = data["transition"].apply(str_to_tuple)
+ data = data.set_index("transition")
+
+ # if index is tuple, convert to a MultiIndex
+ if all(isinstance(idx, tuple) for idx in data.index):
+ data.index = pd.MultiIndex.from_tuples(data.index, names=["from", "to"])
+ # convert dataframe to series
+
+ data = pd.Series(data["adjusted_rate"].values, index=data.index)
+
+ return data