From 547575e1869db7f35a8e688c0a09312159366423 Mon Sep 17 00:00:00 2001 From: Francisco Macedo Date: Thu, 30 May 2024 14:59:30 +0100 Subject: [PATCH] cleanup: move config to python code --- accounts/admin.py | 1 - accounts/backends.py | 3 +- d.py | 120 +++++++++++++++ dm_regional_app/views.py | 15 +- ssda903/__init__.py | 2 - ssda903/config/__init__.py | 7 +- ssda903/config/_age_brackets.py | 157 ++++++++++--------- ssda903/config/_configuration_loader.py | 107 ------------- ssda903/config/_configuration_source.py | 47 ------ ssda903/config/_costs.py | 77 +++++++--- ssda903/config/_placement_categories.py | 166 ++++++++++++++------- ssda903/datacontainer.py | 42 +++--- ssda903/fixtures/config/__init__.py | 0 ssda903/fixtures/config/standard-v1.yaml | 135 ----------------- ssda903/population_stats.py | 37 +++-- ssda903/predictor.py | 5 +- ssda903/reader.py | 4 +- ssda903/{fixtures => tests}/__init__.py | 0 ssda903/tests/test_age_brackets.py | 65 ++++++++ ssda903/tests/test_costs.py | 29 ++++ ssda903/tests/test_placement_categories.py | 30 ++++ 21 files changed, 546 insertions(+), 503 deletions(-) create mode 100644 d.py delete mode 100644 ssda903/config/_configuration_loader.py delete mode 100644 ssda903/config/_configuration_source.py delete mode 100644 ssda903/fixtures/config/__init__.py delete mode 100644 ssda903/fixtures/config/standard-v1.yaml rename ssda903/{fixtures => tests}/__init__.py (100%) create mode 100644 ssda903/tests/test_age_brackets.py create mode 100644 ssda903/tests/test_costs.py create mode 100644 ssda903/tests/test_placement_categories.py diff --git a/accounts/admin.py b/accounts/admin.py index 0c72ff2..9dd04f7 100755 --- a/accounts/admin.py +++ b/accounts/admin.py @@ -4,7 +4,6 @@ from django.contrib.auth.admin import UserAdmin as DjangoUserAdmin from django.utils.translation import gettext_lazy as _ - from .models import CustomUser diff --git a/accounts/backends.py b/accounts/backends.py index 5e9bab9..dc5f2c0 100755 --- a/accounts/backends.py +++ b/accounts/backends.py @@ -1,6 +1,5 @@ -from django.contrib.auth.backends import ModelBackend from django.contrib.auth import get_user_model - +from django.contrib.auth.backends import ModelBackend UserModel = get_user_model() diff --git a/d.py b/d.py new file mode 100644 index 0000000..63238f0 --- /dev/null +++ b/d.py @@ -0,0 +1,120 @@ +import logging +from dataclasses import dataclass +from enum import Enum +from typing import Optional + +logger = logging.getLogger(__name__) + + +DEFAULT_START = -1 +DEFAULT_END = 30 + + +@dataclass +class AgeBracket: + index: int + _name: Optional[str] = None + start: Optional[int] = DEFAULT_START + end: Optional[int] = DEFAULT_END + _length_in_days: Optional[int] = None + + @property + def length_in_days(self): + if self._length_in_days is not None: + return self._length_in_days + return (self.end - self.start) * 365 + + @property + def name(self): + if self._name is not None: + return self._name + return f"{self.start} to {self.end}" + + @property + def daily_probability(self): + return 1 / self.length_in_days + + def __str__(self): + return self.name + + def __repr__(self): + return f"<{self.__class__.__name__}: {self.name}>" + + +class AgeBrackets(Enum): + """ + Age Brackets Enum. + + to get a age bracket dataclass: + ab = AgeBrackets.BIRTH_TO_ONE.value + # ab = + + + if you do AgeBrackets.BIRTH_TO_ONE that's actually the Enum member, not the AgeBracket: + not_ab = AgeBrackets.BIRTH_TO_ONE + # not_ab = != + + + to get a placement category name: + pc_name = PlacementCategories.FOSTERING.value.name + # pc = "Fostering" + + if you do PlacementCategories.FOSTERING.name that's actually the name of the Enum member, not the name of the PlacementCategory: + not_pc_name = PlacementCategories.FOSTERING.name + # not_pc_name = "FOSTERING" != "Fostering" + + """ + + BIRTH_TO_ONE = AgeBracket(_name="Birth to 1", end=1, index=0, _length_in_days=365) + ONE_TO_FIVE = AgeBracket(start=1, end=5, index=1) + FIVE_TO_TEN = AgeBracket(start=5, end=10, index=2) + TEN_TO_SIXTEEN = AgeBracket(start=10, end=16, index=3) + SIXTEEN_TO_EIGHTEEN = AgeBracket(_name="16 to 18+", start=16, index=4) + + @classmethod + def values(cls) -> list[AgeBracket]: + return [a.value for a in cls._members_by_index()] + + @classmethod + def _members_by_index(cls) -> list["AgeBrackets"]: + """ + Returns a list of all members of the enum ordered by index. + """ + return sorted(list(cls.__members__.values()), key=lambda x: x.value.index) + + @property + def next(self) -> Optional["AgeBrackets"]: + """ + Returns the next AgeBrackets in the enum. + If the current AgeBrackets is the last one, returns None. + """ + members = self._members_by_index() + current_index = members.index(self) + if current_index == len(members) - 1: + return None + return members[current_index + 1] + + @property + def previous(self) -> Optional["AgeBrackets"]: + """ + Returns the previous AgeBrackets in the enum. + If the current AgeBrackets is the first one, returns None. + """ + members = self._members_by_index() + current_index = members.index(self) + if current_index == 0: + return None + return members[current_index - 1] + + @classmethod + def bracket_for_age(cls, age: float) -> Optional[AgeBracket]: + for bracket in cls: + if bracket.value.start <= age < bracket.value.end: + return bracket.value + return None + + +d = AgeBrackets.BIRTH_TO_ONE.name + + +print(type(d)) diff --git a/dm_regional_app/views.py b/dm_regional_app/views.py index 2563402..3e08613 100644 --- a/dm_regional_app/views.py +++ b/dm_regional_app/views.py @@ -9,7 +9,7 @@ from dm_regional_app.forms import HistoricDataFilter, PredictFilter from dm_regional_app.models import SavedScenario, SessionScenario from dm_regional_app.utils import apply_filters -from ssda903 import Config +from ssda903.config import PlacementCategories from ssda903.population_stats import PopulationStats from ssda903.predictor import predict from ssda903.reader import read_data @@ -146,8 +146,7 @@ def prediction(request): else: empty_dataframe = False - config = Config() - stats = PopulationStats(historic_data, config) + stats = PopulationStats(historic_data) # Call predict function with default dates prediction = predict( @@ -181,7 +180,6 @@ def historic_data(request): if "session_scenario_id" in request.session: pk = request.session["session_scenario_id"] session_scenario = get_object_or_404(SessionScenario, pk=pk) - config = Config() # read data datacontainer = read_data(source=settings.DATA_SOURCE) @@ -218,16 +216,13 @@ def historic_data(request): data = apply_filters(datacontainer.enriched_view, form.initial) entry_into_care_count = data.loc[ - data.placement_type_before - == datacontainer.config.PlacementCategories.NOT_IN_CARE + data.placement_type_before == PlacementCategories.NOT_IN_CARE.value.label ]["CHILD"].nunique() exiting_care_count = data.loc[ - data.placement_type_after - == datacontainer.config.PlacementCategories.NOT_IN_CARE + data.placement_type_after == PlacementCategories.NOT_IN_CARE.value.label ]["CHILD"].nunique() - config = Config() - stats = PopulationStats(data, config) + stats = PopulationStats(data) chart = historic_chart(stats) diff --git a/ssda903/__init__.py b/ssda903/__init__.py index 656b899..b54e563 100644 --- a/ssda903/__init__.py +++ b/ssda903/__init__.py @@ -1,4 +1,3 @@ -from .config import Config from .datacontainer import DemandModellingDataContainer from .datastore import StorageDataStore from .population_stats import PopulationStats @@ -6,6 +5,5 @@ __all__ = [ "DemandModellingDataContainer", "PopulationStats", - "Config", "StorageDataStore", ] diff --git a/ssda903/config/__init__.py b/ssda903/config/__init__.py index e5ccdbe..757f27b 100644 --- a/ssda903/config/__init__.py +++ b/ssda903/config/__init__.py @@ -1,3 +1,6 @@ -from ._configuration_loader import Config +from ._age_brackets import AgeBrackets +from ._costs import Costs +from ._placement_categories import PlacementCategories -__all__ = ["Config"] +YEAR_IN_DAYS = 365.24 +__all__ = ["AgeBrackets", "PlacementCategories", "Costs", "YEAR_IN_DAYS"] diff --git a/ssda903/config/_age_brackets.py b/ssda903/config/_age_brackets.py index 187ff03..ef190d0 100644 --- a/ssda903/config/_age_brackets.py +++ b/ssda903/config/_age_brackets.py @@ -1,96 +1,115 @@ import logging +from dataclasses import dataclass from enum import Enum -from typing import Any, Dict, Optional, Tuple - -from ._placement_categories import PlacementCategories +from typing import Optional logger = logging.getLogger(__name__) -class AgeBrackets(Enum): - def __init__(self, config): - PlacementCategories = config.get("placement_categories", []) - logger.debug("Configuring AgeBracket with %s", config) - - self.__index = config["index"] - self.__start = config.get("min", -1) - self.__end = config.get("max", 30) - self.__label = config.get("label", f"{self.start} to {self.end}") - self.__length_in_days = config.get( - "length_in_days", (self.end - self.start) * 365 - ) - self.__placement_categories = tuple( - [PlacementCategories[cat] for cat in config.get("categories", [])] - + [PlacementCategories.OTHER] - ) - self.__index = config.get("index", 0) - self._value_ = self.__label +DEFAULT_START = -1 +DEFAULT_END = 30 - @property - def start(self) -> int: - return self.__start - @property - def end(self) -> int: - return self.__end +@dataclass +class AgeBracket: + index: int + _label: Optional[str] = None + start: Optional[int] = DEFAULT_START + end: Optional[int] = DEFAULT_END + _length_in_days: Optional[int] = None @property - def placement_categories(self) -> Tuple[PlacementCategories]: - return self.__placement_categories + def length_in_days(self): + if self._length_in_days is not None: + return self._length_in_days + return (self.end - self.start) * 365 @property def label(self): - return self.__label + if self._label is not None: + return self._label + return f"{self.start} to {self.end}" @property - def index(self): - return self.__index + def daily_probability(self): + return 1 / self.length_in_days - @property - def next(self): - return type(self).for_index(self.index + 1) + def __str__(self): + return self.label - @property - def previous(self): - return type(self).for_index(self.index - 1) + def __repr__(self): + return f"<{self.__class__.__name__}: {self.label}>" - @property - def length_in_days(self): - return self.__length_in_days - @property - def daily_probability(self): - return 1 / self.length_in_days +class AgeBrackets(Enum): + """ + Age Brackets Enum. - @classmethod - def bracket_for_age(cls, age: float) -> Optional["AgeBrackets"]: - for bracket in cls: - if bracket.start <= age < bracket.end: - return bracket - return None + to get a age bracket dataclass: + ab = AgeBrackets.BIRTH_TO_ONE.value + ab will be - @classmethod - def for_index(cls, index: int) -> Optional["AgeBrackets"]: - for bracket in cls: - if bracket.index == index: - return bracket - return None - def __str__(self): - return self.label + if you do AgeBrackets.BIRTH_TO_ONE that's actually the Enum member, not the AgeBracket: + not_ab = AgeBrackets.BIRTH_TO_ONE + not_ab will be + which is not the same as - def __repr__(self): - return f"<{self.__class__.__name__}.{self.name}: {self.label}>" + to get a age bracket label: + ab_label = AgeBrackets.BIRTH_TO_ONE.value.label + ab_label will be "Birth to 1" - def __lt__(self, other): - return self.__index < other.__index + if you do AgeBrackets.BIRTH_TO_ONE.name that's actually the name of the Enum member, not the label of the AgeBracket: + not_ab_label = AgeBrackets.BIRTH_TO_ONE.name + not_ab_label will be "BIRTH_TO_ONE" + which is not the same as "Birth to 1" + """ -def build_age_brackets(config: Dict[str, Any], placement_categories=None): - config = config.copy() - for ix, v in enumerate(config.values()): - v["index"] = ix - if placement_categories: - v["placement_categories"] = placement_categories + BIRTH_TO_ONE = AgeBracket(_label="Birth to 1", end=1, index=0, _length_in_days=365) + ONE_TO_FIVE = AgeBracket(start=1, end=5, index=1) + FIVE_TO_TEN = AgeBracket(start=5, end=10, index=2) + TEN_TO_SIXTEEN = AgeBracket(start=10, end=16, index=3) + SIXTEEN_TO_EIGHTEEN = AgeBracket(_label="16 to 18+", start=16, index=4) + + @classmethod + def values(cls) -> list[AgeBracket]: + return [a.value for a in cls._members_by_index()] - return AgeBrackets("AgeBrackets", config) + @classmethod + def _members_by_index(cls) -> list["AgeBrackets"]: + """ + Returns a list of all members of the enum ordered by index. + """ + return sorted(list(cls.__members__.values()), key=lambda x: x.value.index) + + @property + def next(self) -> Optional["AgeBrackets"]: + """ + Returns the next AgeBrackets in the enum. + If the current AgeBrackets is the last one, returns None. + """ + members = self._members_by_index() + current_index = members.index(self) + if current_index == len(members) - 1: + return None + return members[current_index + 1] + + @property + def previous(self) -> Optional["AgeBrackets"]: + """ + Returns the previous AgeBrackets in the enum. + If the current AgeBrackets is the first one, returns None. + """ + members = self._members_by_index() + current_index = members.index(self) + if current_index == 0: + return None + return members[current_index - 1] + + @classmethod + def bracket_for_age(cls, age: float) -> Optional[AgeBracket]: + for bracket in cls: + if bracket.value.start <= age < bracket.value.end: + return bracket.value + return None diff --git a/ssda903/config/_configuration_loader.py b/ssda903/config/_configuration_loader.py deleted file mode 100644 index 88926b1..0000000 --- a/ssda903/config/_configuration_loader.py +++ /dev/null @@ -1,107 +0,0 @@ -from pathlib import Path -from typing import Callable, NamedTuple - -import pandas as pd -import yaml - -from ..fixtures import config as config_fixtures -from ._age_brackets import AgeBrackets, build_age_brackets -from ._configuration_source import ConfigurationSource -from ._costs import Costs -from ._placement_categories import PlacementCategories, build_placement_categories - -DEFAULT_CONFIG_PATH = Path(config_fixtures.__file__).parent / "standard-v1.yaml" - - -class State(NamedTuple): - age_bin: AgeBrackets - placement_type: PlacementCategories - - -class Transition(NamedTuple): - age_bin: AgeBrackets - placement_type: PlacementCategories - placement_type_after: PlacementCategories - - -def multi_index(source): - source = list(source) - if len(source) == 0: - raise ValueError("No data to index") - return pd.MultiIndex.from_tuples(source, names=source[0]._fields) - - -def supports_index(func: Callable) -> Callable: - def wrapper(self, *args, **kwargs): - as_index = kwargs.pop("as_index", False) - if as_index: - return multi_index(func(self, *args, **kwargs)) - else: - return func(self, *args, **kwargs) - - return wrapper - - -class ConfigMeta(type): - """ - We create proxy properties for the attributes of the ConfigurationSource, just to make access a bit easier. - """ - - def __new__(mcs, name, bases, dct): - for key in ["name", "description", "version", "year_in_days", "src"]: - dct[key] = property(lambda self, k=key: getattr(self._config, k)) - x = super().__new__(mcs, name, bases, dct) - return x - - -class Config(metaclass=ConfigMeta): - def __init__(self, src: str = DEFAULT_CONFIG_PATH): - with open(src, "rt") as file: - config = yaml.safe_load(file) - - self._config = ConfigurationSource(config) - self._placements_categories = build_placement_categories( - self._config.placement_categories - ) - self._age_brackets = build_age_brackets( - self._config.age_brackets, self._placements_categories - ) - self._costs = Costs(self) - - @property - def config(self): - return self._config - - @property - def costs(self): - return self._costs - - @property - def AgeBrackets(self) -> AgeBrackets: - return self._age_brackets - - @property - def PlacementCategories(self) -> PlacementCategories: - return self._placements_categories - - @supports_index - def states(self) -> State: - for age_bin in self.AgeBrackets: - categories = age_bin.placement_categories - for pt1 in categories: - yield State(age_bin, pt1) - - @supports_index - def transitions( - self, not_in_care=False, self_transitions=True, other_transitions=True - ) -> Transition: - for age_bin in self.AgeBrackets: - from_categories = to_categories = age_bin.placement_categories - if not_in_care: - to_categories = to_categories + (self.PlacementCategories.NOT_IN_CARE,) - for pt1 in from_categories: - for pt2 in to_categories: - if (pt1 == pt2 and self_transitions) or ( - pt1 != pt2 and other_transitions - ): - yield Transition(age_bin, pt1, pt2) diff --git a/ssda903/config/_configuration_source.py b/ssda903/config/_configuration_source.py deleted file mode 100644 index a4fdcda..0000000 --- a/ssda903/config/_configuration_source.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Any, Mapping - - -class ConfigurationSource: - """ - This class allows access to the key properties of a dictionary of configuration values. - - It is here to allow us to change configuration and maintain backwards compatibility. - - It is used by the factory methods to actually create the program values. - """ - - def __init__(self, config: Mapping[str, Any]): - self._config = config - self._config_config = config["config"] - - @property - def name(self): - return self._config["name"] - - @property - def description(self): - return self._config["description"] - - @property - def version(self): - return self._config["version"] - - @property - def age_brackets(self): - return self._config_config["AgeBrackets"] - - @property - def placement_categories(self): - return self._config_config["PlacementCategories"] - - @property - def costs(self): - return self._config_config["costs"] - - @property - def year_in_days(self): - return self._config_config["YearInDays"] - - @property - def src(self): - return self._config diff --git a/ssda903/config/_costs.py b/ssda903/config/_costs.py index e9399d5..6aa5706 100644 --- a/ssda903/config/_costs.py +++ b/ssda903/config/_costs.py @@ -1,6 +1,9 @@ from dataclasses import dataclass +from enum import Enum from typing import Generator +from ssda903.config._placement_categories import PlacementCategories, PlacementCategory + @dataclass class CostDefaults: @@ -10,17 +13,14 @@ class CostDefaults: @dataclass class CostItem: - id: str label: str - category: "PlacementCategory" + category: PlacementCategory defaults: CostDefaults def toJSON(self) -> dict: return { - "id": self.id, "label": self.label, "category": { - "name": self.category.name, "label": self.category.label, }, "defaults": { @@ -30,26 +30,57 @@ def toJSON(self) -> dict: } -def _parse_config(config: "Config") -> Generator[CostItem, None, None]: - for key, value in config.config.costs.items(): - category = config.PlacementCategories[value["category"]] - defaults = CostDefaults(**value["defaults"]) - yield CostItem(key, value["label"], category, defaults) - - -class Costs: - def __init__(self, config: "Config"): - self.costs = {cost.id: cost for cost in _parse_config(config)} - - def __getitem__(self, key): - return self.costs[key] +class Costs(Enum): + FOSTER_FRIEND_RELATION = CostItem( + label="Fostering (Friend/Relative)", + category=PlacementCategories.FOSTERING.value, + defaults=CostDefaults(cost_per_day=100, proportion=1), + ) + FOSTER_IN_HOUSE = CostItem( + label="Fostering (In-house)", + category=PlacementCategories.FOSTERING.value, + defaults=CostDefaults(cost_per_day=150, proportion=1), + ) + FOSTER_IFA = CostItem( + label="Fostering (IFA)", + category=PlacementCategories.FOSTERING.value, + defaults=CostDefaults(cost_per_day=250, proportion=1), + ) + RESIDENTIAL_IN_HOUSE = CostItem( + label="Residential (In-house)", + category=PlacementCategories.RESIDENTIAL.value, + defaults=CostDefaults(cost_per_day=1000, proportion=1), + ) + RESIDENTIAL_EXTERNAL = CostItem( + label="Residential (External)", + category=PlacementCategories.RESIDENTIAL.value, + defaults=CostDefaults(cost_per_day=1000, proportion=1), + ) + SUPPORTED = CostItem( + label="Supported accomodation", + category=PlacementCategories.SUPPORTED.value, + defaults=CostDefaults(cost_per_day=1000, proportion=1), + ) + SECURE_HOME = CostItem( + label="Secure home", + category=PlacementCategories.OTHER.value, + defaults=CostDefaults(cost_per_day=1000, proportion=1), + ) + PLACED_WITH_FAMILY = CostItem( + label="Placed with family", + category=PlacementCategories.OTHER.value, + defaults=CostDefaults(cost_per_day=1000, proportion=1), + ) + OTHER = CostItem( + label="Other", + category=PlacementCategories.OTHER.value, + defaults=CostDefaults(cost_per_day=1000, proportion=1), + ) + @classmethod def by_category( - self, category: "PlacementCategory" + cls, category: "PlacementCategory" ) -> Generator[CostItem, None, None]: - for cost in self.costs.values(): - if cost.category == category: + for cost in cls: + if cost.value.category == category: yield cost - - def __iter__(self): - return iter(self.costs.values()) diff --git a/ssda903/config/_placement_categories.py b/ssda903/config/_placement_categories.py index dfe6da4..5135400 100644 --- a/ssda903/config/_placement_categories.py +++ b/ssda903/config/_placement_categories.py @@ -1,67 +1,121 @@ -import json import logging -from enum import Enum, EnumMeta -from typing import Any, Dict +from dataclasses import dataclass +from enum import Enum logger = logging.getLogger(__name__) -class PlacementCategoriesMeta(EnumMeta): - def __new__(mcs, name, bases, dct, **kwargs): - cls = super().__new__(mcs, name, bases, dct, **kwargs) +@dataclass +class PlacementCategory: + label: str + placement_types: tuple + index: int = 0 - cls.__labels = {p.label: p for p in cls} - - mappings = {t: p for p in cls for t in p.placement_types} - cls.__placement_type_map = {t: mappings[t] for t in sorted(mappings.keys())} - - return cls - - @property - def labels(cls): - return cls.__labels - - @property - def placement_type_map(cls): - return cls.__placement_type_map - - -class PlacementCategories(Enum, metaclass=PlacementCategoriesMeta): - def __init__(self, config): - logger.debug("Configuring PlacementCategories with %s", config) - - self.__label = config.get("label", self.name.capitalize()) - self.__placement_types = tuple(config.get("placement_types", [])) - self.__index = config.get("index", 0) - self._value_ = self.__label - - @property - def label(self): - return self.__label - - @property - def placement_types(self): - return self.__placement_types + def __lt__(self, other: "PlacementCategory"): + if not isinstance(other, PlacementCategory): + raise TypeError( + f"Cannot compare {self.__class__.__name__} with {other.__class__.__name__}" + ) + return self.index < other.index def __str__(self): return self.label def __repr__(self): - return f"<{self.__class__.__name__}.{self.name}: {self.label}>" - - def __lt__(self, other): - return self.__index < other.__index - - -def build_placement_categories(config: Dict[str, Any]): - config = config.copy() - for ix, v in enumerate(config.values()): - v["index"] = ix - - if "OTHER" not in config: - config["OTHER"] = {"label": "Other"} - - if "NOT_IN_CARE" not in config: - config["NOT_IN_CARE"] = {"label": "Not in care"} - - return PlacementCategories("PlacementCategories", config) + return f"<{self.__class__.__name__}: {self.label}>" + + +class PlacementCategories(Enum): + """ + Placement Categories Enum. + + to get a placement category dataclass: + pc = PlacementCategories.FOSTERING.value + pc will be + + + if you do PlacementCategories.FOSTERING that's actually the Enum member, not the PlacementCategory: + not_pc = PlacementCategories.FOSTERING + not_pc will be + which is not the same as + + + to get a placement category label: + pc_label = PlacementCategories.FOSTERING.value.label + pc_label will be "Fostering" + + if you do PlacementCategories.FOSTERING.name that's actually the name of the Enum member, not the label of the PlacementCategory: + not_pc_label = PlacementCategories.FOSTERING.name + not_pc_label will be "FOSTERING" + which is not them same as "Fostering" + + """ + + FOSTERING = PlacementCategory( + label="Fostering", + placement_types=( + "U1", + "U2", + "U3", + "U4", + "U5", + "U6", + ), + index=0, + ) + + RESIDENTIAL = PlacementCategory( + label="Residential", + placement_types=( + "K2", + "R1", + ), + index=1, + ) + + SUPPORTED = PlacementCategory( + label="Supported", + placement_types=( + "H5", + "P2", + ), + index=2, + ) + OTHER = PlacementCategory( + label="Other", + placement_types=(), + index=3, + ) + + NOT_IN_CARE = PlacementCategory( + label="Not in care", + placement_types=(), + index=4, + ) + + @classmethod + def values(cls) -> list[PlacementCategory]: + return [a.value for a in cls._members_by_index()] + + @classmethod + def _members_by_index(cls) -> list["PlacementCategories"]: + """ + Returns a list of all members of the enum ordered by index. + """ + return sorted(list(cls.__members__.values()), key=lambda x: x.value.index) + + @classmethod + def get_placement_type_map(cls) -> dict[str, PlacementCategory]: + """ + return a dictionary of placement types to placement categories + for example: + { + "U1": , + "U2": , + ... + "K2": , + "R1": , + ... + } + """ + return {pt: pc for pc in cls.values() for pt in pc.placement_types} diff --git a/ssda903/datacontainer.py b/ssda903/datacontainer.py index ed79c4a..38d863c 100644 --- a/ssda903/datacontainer.py +++ b/ssda903/datacontainer.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from ssda903.config import Config +from ssda903.config import YEAR_IN_DAYS, AgeBrackets, PlacementCategories from ssda903.data.ssda903 import SSDA903TableType from ssda903.datastore import DataFile, DataStore, TableType @@ -20,9 +20,8 @@ class DemandModellingDataContainer: merging data to create a single, consistent dataset. """ - def __init__(self, datastore: DataStore, config: Config): + def __init__(self, datastore: DataStore): self.__datastore = datastore - self.__config = config self.__file_info = [] for file_info in datastore.files: @@ -43,10 +42,6 @@ def __init__(self, datastore: DataStore, config: Config): def file_info(self): return self.__file_info - @property - def config(self) -> Config: - return self.__config - def _detect_table_type(self, file_info: DataFile) -> Optional[TableType]: """ Detect the table type of a file by reading the first line of the file and looking for a @@ -212,12 +207,8 @@ def _add_ages(self, combined: pd.DataFrame) -> pd.DataFrame: WARNING: This method modifies the dataframe in place. """ - combined["age"] = ( - combined["DECOM"] - combined["DOB"] - ).dt.days / self.__config.year_in_days - combined["end_age"] = ( - combined["DEC"] - combined["DOB"] - ).dt.days / self.__config.year_in_days + combined["age"] = (combined["DECOM"] - combined["DOB"]).dt.days / YEAR_IN_DAYS + combined["end_age"] = (combined["DEC"] - combined["DOB"]).dt.days / YEAR_IN_DAYS return combined def _add_age_bins(self, combined: pd.DataFrame) -> pd.DataFrame: @@ -226,9 +217,15 @@ def _add_age_bins(self, combined: pd.DataFrame) -> pd.DataFrame: WARNING: This method modifies the dataframe in place. """ - AgeBracket = self.__config.AgeBrackets - combined["age_bin"] = combined["age"].apply(AgeBracket.bracket_for_age) - combined["end_age_bin"] = combined["end_age"].apply(AgeBracket.bracket_for_age) + + def get_age_bracket_label(age): + age_bracket = AgeBrackets.bracket_for_age(age) + if age_bracket is not None: + return age_bracket.label + return None + + combined["age_bin"] = combined["age"].apply(get_age_bracket_label) + combined["end_age_bin"] = combined["end_age"].apply(get_age_bracket_label) return combined def _add_related_placement_type( @@ -239,14 +236,13 @@ def _add_related_placement_type( WARNING: This method modifies the dataframe in place. """ - PlacementCategories = self.__config.PlacementCategories combined = combined.sort_values(["CHILD", "DECOM", "DEC"], na_position="first") combined[new_column_name] = ( combined.groupby("CHILD")["placement_type"] .shift(offset) - .fillna(PlacementCategories.NOT_IN_CARE) + .fillna(PlacementCategories.NOT_IN_CARE.value.label) ) offset_mask = combined["CHILD"] == combined["CHILD"].shift(offset) @@ -254,7 +250,9 @@ def _add_related_placement_type( offset_mask &= combined["DECOM"] != combined["DEC"].shift(offset) else: offset_mask &= combined["DEC"] != combined["DECOM"].shift(offset) - combined.loc[offset_mask, new_column_name] = PlacementCategories.NOT_IN_CARE + combined.loc[ + offset_mask, new_column_name + ] = PlacementCategories.NOT_IN_CARE.value.label return combined def _add_placement_category(self, combined: pd.DataFrame) -> pd.DataFrame: @@ -263,10 +261,8 @@ def _add_placement_category(self, combined: pd.DataFrame) -> pd.DataFrame: WARNING: This method modifies the dataframe in place. """ - PlacementCategories = self.__config.PlacementCategories + placement_type_map = PlacementCategories.get_placement_type_map() combined["placement_type"] = combined["PLACE"].apply( - lambda x: PlacementCategories.placement_type_map.get( - x, PlacementCategories.OTHER - ) + lambda x: placement_type_map.get(x, PlacementCategories.OTHER.value).label ) return combined diff --git a/ssda903/fixtures/config/__init__.py b/ssda903/fixtures/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ssda903/fixtures/config/standard-v1.yaml b/ssda903/fixtures/config/standard-v1.yaml deleted file mode 100644 index 7ebc079..0000000 --- a/ssda903/fixtures/config/standard-v1.yaml +++ /dev/null @@ -1,135 +0,0 @@ -name: Standard 903 Demand Modelling Configuration -version: 1.0 -description: | - Standard 903 Demand Modelling Configuration Baseline. - - This file is a YAML file that contains the configuration for the standard 903 demand modelling configuration. - -config: - YearInDays: 365.24 - - PlacementCategories: - FOSTERING: - label: Fostering - placement_types: - - U1 - - U2 - - U3 - - U4 - - U5 - - U6 - - RESIDENTIAL: - label: Residential - placement_types: - - K2 - - R1 - - SUPPORTED: - label: Supported - placement_types: - - H5 - - P2 - - AgeBrackets: - - BIRTH_TO_ONE: - label: Birth to 1 - max: 1 - length_in_days: 365 # This first bracket is a special case in that it only has a max year, but is still a year long - categories: - - FOSTERING - - ONE_TO_FIVE: - min: 1 - max: 5 - categories: - - FOSTERING - - FIVE_TO_TEN: - min: 5 - max: 10 - categories: - - FOSTERING - - RESIDENTIAL - - TEN_TO_SIXTEEN: - min: 10 - max: 16 - categories: - - FOSTERING - - RESIDENTIAL - - SIXTEEN_TO_EIGHTEEN: - label: 16 to 18+ - min: 16 - categories: - - FOSTERING - - RESIDENTIAL - - SUPPORTED - - - costs: - - foster_friend_relation: - label: Fostering (Friend/Relative) - category: FOSTERING - defaults: - cost_per_day: 100 - proportion: 1 - - foster_in_house: - label: Fostering (In-house) - category: FOSTERING - defaults: - cost_per_day: 150 - proportion: 1 - - foster_IFA: - label: Fostering (IFA) - category: FOSTERING - defaults: - cost_per_day: 250 - proportion: 1 - - resi_in_house: - label: Residential (In-house) - category: RESIDENTIAL - defaults: - cost_per_day: 1000 - proportion: 1 - - resi_external: - label: Residential (External) - category: RESIDENTIAL - defaults: - cost_per_day: 1000 - proportion: 1 - - supported: - label: Supported accomodation - category: SUPPORTED - defaults: - cost_per_day: 1000 - proportion: 1 - - secure_home: - label: Secure home - category: OTHER - defaults: - cost_per_day: 1000 - proportion: 1 - - placed_with_family: - label: Placed with family - category: OTHER - defaults: - cost_per_day: 1000 - proportion: 1 - - other: - label: Other - category: OTHER - defaults: - cost_per_day: 1000 - proportion: 1 diff --git a/ssda903/population_stats.py b/ssda903/population_stats.py index fcf98a0..4c971f2 100644 --- a/ssda903/population_stats.py +++ b/ssda903/population_stats.py @@ -1,25 +1,19 @@ from datetime import date from functools import lru_cache -import numpy as np import pandas as pd -from ssda903.config import Config +from ssda903.config import AgeBrackets, PlacementCategories class PopulationStats: - def __init__(self, df: pd.DataFrame, config: Config): + def __init__(self, df: pd.DataFrame): self.__df = df - self.__config = config @property def df(self): return self.__df - @property - def config(self) -> Config: - return self.__config - @property def stock(self): """ @@ -28,7 +22,7 @@ def stock(self): day and then resampling to get the daily populations. """ df = self.df.copy() - df["bin"] = df.apply(lambda c: (c.age_bin.name, c.placement_type.name), axis=1) + df["bin"] = df.apply(lambda c: (c.age_bin, c.placement_type), axis=1) endings = df.groupby(["DEC", "bin"]).size() endings.name = "nof_decs" @@ -56,7 +50,6 @@ def stock(self): pops = pops.resample("D").first().fillna(method="ffill").fillna(0) # Add the missing age bins and fill with zeros - # pops = pops.T.reindex(self.__config.states(as_index=True)).T.fillna(0) return pops @@ -74,10 +67,10 @@ def stock_at(self, start_date) -> pd.Series: def transitions(self): transitions = self.df.copy() transitions["start_bin"] = transitions.apply( - lambda c: (c.age_bin.name, c.placement_type.name), axis=1 + lambda c: (c.age_bin, c.placement_type), axis=1 ) transitions["end_bin"] = transitions.apply( - lambda c: (c.age_bin.name, c.placement_type_after.name), axis=1 + lambda c: (c.age_bin, c.placement_type_after), axis=1 ) transitions = transitions.groupby(["start_bin", "end_bin", "DEC"]).size() transitions = ( @@ -112,16 +105,18 @@ def ageing_out(self) -> pd.Series: Returns the probability of ageing out from one bin to the other. """ ageing_out = [] - for age_group in self.config.AgeBrackets: - for pt in self.config.PlacementCategories: + for age_group in AgeBrackets: + for pt in PlacementCategories: next_name = ( - (age_group.next.name, pt.name) if age_group.next else tuple() + (age_group.next.value.label, pt.value.label) + if age_group.next + else tuple() ) ageing_out.append( { - "from": (age_group.name, pt.name), + "from": (age_group.value.label, pt.value.label), "to": next_name, - "rate": age_group.daily_probability, + "rate": age_group.value.daily_probability, } ) @@ -134,7 +129,6 @@ def daily_entrants(self, start_date: date, end_date: date) -> pd.Series: """ Returns the number of entrants and the daily_probability of entrants for each age bracket and placement type. """ - PlacementCategories = self.__config.PlacementCategories start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) @@ -142,11 +136,14 @@ def daily_entrants(self, start_date: date, end_date: date) -> pd.Series: # Only look at episodes starting in analysis period df = df[(df["DECOM"] >= start_date) & (df["DECOM"] <= end_date)].copy() - df["to"] = df.apply(lambda c: (c.age_bin.name, c.placement_type.name), axis=1) + df["to"] = df.apply(lambda c: (c.age_bin, c.placement_type), axis=1) # Group by age bin and placement type df = ( - df[df["placement_type_before"] == PlacementCategories.NOT_IN_CARE] + df[ + df["placement_type_before"] + == PlacementCategories.NOT_IN_CARE.value.label + ] .groupby(["to"]) .size() ) diff --git a/ssda903/predictor.py b/ssda903/predictor.py index 58ddfde..2fadc58 100644 --- a/ssda903/predictor.py +++ b/ssda903/predictor.py @@ -6,7 +6,7 @@ from demand_model import MultinomialPredictor from demand_model.multinomial.predictor import Prediction -from ssda903 import Config, PopulationStats +from ssda903 import PopulationStats def predict( @@ -19,8 +19,7 @@ def predict( """ Analyses source between start and end, and then predicts the population at prediction_date. """ - config = Config() - stats = PopulationStats(data, config) + stats = PopulationStats(data) if prediction_start_date is None: prediction_start_date = reference_end_date if prediction_end_date is None: diff --git a/ssda903/reader.py b/ssda903/reader.py index 99121c9..b476623 100644 --- a/ssda903/reader.py +++ b/ssda903/reader.py @@ -1,6 +1,5 @@ from django.core.files.storage import default_storage -from ssda903.config import Config from ssda903.datacontainer import DemandModellingDataContainer from ssda903.datastore import StorageDataStore @@ -10,6 +9,5 @@ def read_data(source) -> DemandModellingDataContainer: Read data from source and return a pandas DataFrame """ datastore = StorageDataStore(default_storage, source) - config = Config() - dc = DemandModellingDataContainer(datastore, config) + dc = DemandModellingDataContainer(datastore) return dc diff --git a/ssda903/fixtures/__init__.py b/ssda903/tests/__init__.py similarity index 100% rename from ssda903/fixtures/__init__.py rename to ssda903/tests/__init__.py diff --git a/ssda903/tests/test_age_brackets.py b/ssda903/tests/test_age_brackets.py new file mode 100644 index 0000000..bb7707e --- /dev/null +++ b/ssda903/tests/test_age_brackets.py @@ -0,0 +1,65 @@ +import unittest + +from ssda903.config._age_brackets import AgeBracket, AgeBrackets + + +class AgeBracketTestCase(unittest.TestCase): + def test_length_in_days(self): + age_bracket = AgeBracket(start=1, end=5, index=1) + self.assertEqual(age_bracket.length_in_days, 1460) + + def test_label(self): + age_bracket = AgeBracket(start=1, end=5, index=1) + self.assertEqual(age_bracket.label, "1 to 5") + + def test_daily_probability(self): + age_bracket = AgeBracket(start=1, end=5, index=1) + self.assertAlmostEqual(age_bracket.daily_probability, 0.0006849315068493151) + + +class AgeBracketsTestCase(unittest.TestCase): + def test_values(self): + expected_values = [ + AgeBracket(_label="Birth to 1", end=1, index=0, _length_in_days=365), + AgeBracket(start=1, end=5, index=1), + AgeBracket(start=5, end=10, index=2), + AgeBracket(start=10, end=16, index=3), + AgeBracket(_label="16 to 18+", start=16, index=4), + ] + self.assertEqual(AgeBrackets.values(), expected_values) + + def test_next(self): + self.assertEqual(AgeBrackets.BIRTH_TO_ONE.next, AgeBrackets.ONE_TO_FIVE) + self.assertEqual(AgeBrackets.ONE_TO_FIVE.next, AgeBrackets.FIVE_TO_TEN) + self.assertEqual(AgeBrackets.FIVE_TO_TEN.next, AgeBrackets.TEN_TO_SIXTEEN) + self.assertEqual( + AgeBrackets.TEN_TO_SIXTEEN.next, AgeBrackets.SIXTEEN_TO_EIGHTEEN + ) + self.assertIsNone(AgeBrackets.SIXTEEN_TO_EIGHTEEN.next) + + def test_previous(self): + self.assertIsNone(AgeBrackets.BIRTH_TO_ONE.previous) + self.assertEqual(AgeBrackets.ONE_TO_FIVE.previous, AgeBrackets.BIRTH_TO_ONE) + self.assertEqual(AgeBrackets.FIVE_TO_TEN.previous, AgeBrackets.ONE_TO_FIVE) + self.assertEqual(AgeBrackets.TEN_TO_SIXTEEN.previous, AgeBrackets.FIVE_TO_TEN) + self.assertEqual( + AgeBrackets.SIXTEEN_TO_EIGHTEEN.previous, AgeBrackets.TEN_TO_SIXTEEN + ) + + def test_bracket_for_age(self): + self.assertEqual( + AgeBrackets.bracket_for_age(0.5), AgeBrackets.BIRTH_TO_ONE.value + ) + self.assertEqual( + AgeBrackets.bracket_for_age(1.5), AgeBrackets.ONE_TO_FIVE.value + ) + self.assertEqual( + AgeBrackets.bracket_for_age(5.5), AgeBrackets.FIVE_TO_TEN.value + ) + self.assertEqual( + AgeBrackets.bracket_for_age(10.5), AgeBrackets.TEN_TO_SIXTEEN.value + ) + self.assertEqual( + AgeBrackets.bracket_for_age(16.5), AgeBrackets.SIXTEEN_TO_EIGHTEEN.value + ) + self.assertIsNone(AgeBrackets.bracket_for_age(50)) diff --git a/ssda903/tests/test_costs.py b/ssda903/tests/test_costs.py new file mode 100644 index 0000000..3b5e7f5 --- /dev/null +++ b/ssda903/tests/test_costs.py @@ -0,0 +1,29 @@ +import unittest + +from ssda903.config._costs import Costs +from ssda903.config._placement_categories import PlacementCategories + + +class TestCosts(unittest.TestCase): + def test_cost_item_to_json(self): + cost_item = Costs.FOSTER_FRIEND_RELATION.value + expected_json = { + "label": "Fostering (Friend/Relative)", + "category": { + "label": "Fostering", + }, + "defaults": { + "cost_per_day": 100, + "proportion": 1, + }, + } + self.assertEqual(cost_item.toJSON(), expected_json) + + def test_by_category(self): + foster_costs = list(Costs.by_category(PlacementCategories.FOSTERING.value)) + expected_foster_costs = [ + Costs.FOSTER_FRIEND_RELATION, + Costs.FOSTER_IN_HOUSE, + Costs.FOSTER_IFA, + ] + self.assertEqual(foster_costs, expected_foster_costs) diff --git a/ssda903/tests/test_placement_categories.py b/ssda903/tests/test_placement_categories.py new file mode 100644 index 0000000..853f1b3 --- /dev/null +++ b/ssda903/tests/test_placement_categories.py @@ -0,0 +1,30 @@ +import unittest + +from ssda903.config._placement_categories import PlacementCategories, PlacementCategory + + +class PlacementCategoriesTestCase(unittest.TestCase): + def test_values(self): + categories = PlacementCategories.values() + self.assertIsInstance(categories, list) + self.assertTrue( + all(isinstance(category, PlacementCategory) for category in categories) + ) + + def test_members_by_index(self): + members = PlacementCategories._members_by_index() + self.assertIsInstance(members, list) + self.assertTrue( + all(isinstance(member, PlacementCategories) for member in members) + ) + self.assertEqual(len(members), len(PlacementCategories)) + + def test_get_placement_type_map(self): + placement_type_map = PlacementCategories.get_placement_type_map() + self.assertIsInstance(placement_type_map, dict) + self.assertTrue( + all( + isinstance(key, str) and isinstance(value, PlacementCategory) + for key, value in placement_type_map.items() + ) + )