diff --git a/src/power_grid_model_io/data_stores/csv_dir_store.py b/src/power_grid_model_io/data_stores/csv_dir_store.py new file mode 100644 index 00000000..18f4f27c --- /dev/null +++ b/src/power_grid_model_io/data_stores/csv_dir_store.py @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model IO project +# +# SPDX-License-Identifier: MPL-2.0 +""" +CSV Directory Store +""" + +from pathlib import Path +from typing import Any, Dict, List + +import pandas as pd + +from power_grid_model_io.data_stores.base_data_store import BaseDataStore +from power_grid_model_io.data_types import LazyDataFrame, TabularData + + +class CsvDirStore(BaseDataStore[TabularData]): + """ + CSV Directory Store + + The first row of each .csv file is expected to contain the column names, unless specified differently by an + extension of this class. + """ + + __slots__ = ("_dir_path", "_csv_kwargs", "_header_rows") + + def __init__(self, dir_path: Path, **csv_kwargs): + super().__init__() + self._dir_path = Path(dir_path) + self._csv_kwargs: Dict[str, Any] = csv_kwargs + self._header_rows: List[int] = [0] + + def load(self) -> TabularData: + """ + Create a lazy loader for all CSV files in a directory and store them in a TabularData instance. + """ + + def lazy_csv_loader(csv_path: Path) -> LazyDataFrame: + def csv_loader(): + return pd.read_csv(filepath_or_buffer=csv_path, header=self._header_rows, **self._csv_kwargs) + + return csv_loader + + data: Dict[str, LazyDataFrame] = {} + for path in self._dir_path.glob("*.csv"): + data[path.stem] = lazy_csv_loader(path) + + return TabularData(**data) + + def save(self, data: TabularData) -> None: + """ + Store each table in data as a separate CSV file + """ + for table_name, table_data in data.items(): + table_data.to_csv(path_or_buf=self._dir_path / f"{table_name}.csv", **self._csv_kwargs) diff --git a/src/power_grid_model_io/data_stores/excel_file_store.py b/src/power_grid_model_io/data_stores/excel_file_store.py index 58adfe2a..4beaafd2 100644 --- a/src/power_grid_model_io/data_stores/excel_file_store.py +++ b/src/power_grid_model_io/data_stores/excel_file_store.py @@ -12,8 +12,7 @@ import pandas as pd from power_grid_model_io.data_stores.base_data_store import BaseDataStore -from power_grid_model_io.data_types import TabularData -from power_grid_model_io.data_types.tabular_data import LazyDataFrame +from power_grid_model_io.data_types import LazyDataFrame, TabularData class ExcelFileStore(BaseDataStore[TabularData]): diff --git a/src/power_grid_model_io/data_types/__init__.py b/src/power_grid_model_io/data_types/__init__.py index a0a45c0a..2cd7fb7a 100644 --- a/src/power_grid_model_io/data_types/__init__.py +++ b/src/power_grid_model_io/data_types/__init__.py @@ -6,4 +6,4 @@ """ from power_grid_model_io.data_types._data_types import ExtraInfo, ExtraInfoLookup, StructuredData -from power_grid_model_io.data_types.tabular_data import TabularData +from power_grid_model_io.data_types.tabular_data import LazyDataFrame, TabularData diff --git a/tests/unit/data_stores/test_csv_dir_store.py b/tests/unit/data_stores/test_csv_dir_store.py new file mode 100644 index 00000000..a6ca754c --- /dev/null +++ b/tests/unit/data_stores/test_csv_dir_store.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project +# +# SPDX-License-Identifier: MPL-2.0 + +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest + +from power_grid_model_io.data_stores.csv_dir_store import CsvDirStore +from power_grid_model_io.data_types import TabularData + + +@pytest.fixture() +def temp_dir(): + with tempfile.TemporaryDirectory() as tmp: + yield Path(tmp).resolve() + + +def touch(file_path: Path): + open(file_path, "wb").close() + + +@patch("power_grid_model_io.data_stores.csv_dir_store.pd.read_csv") +def test_load(mock_read_csv: MagicMock, temp_dir: Path): + # Arrange + foo_data = MagicMock() + bar_data = MagicMock() + touch(temp_dir / "foo.csv") + touch(temp_dir / "bar.csv") + mock_read_csv.side_effect = (foo_data, bar_data) + csv_dir = CsvDirStore(temp_dir, bla=True) + + # Act + csv_data = csv_dir.load() + + # Assert + mock_read_csv.assert_not_called() # The csv data is not yet loaded + assert csv_data["foo"] == foo_data + assert csv_data["bar"] == bar_data + mock_read_csv.assert_any_call(filepath_or_buffer=temp_dir / "foo.csv", header=[0], bla=True) + mock_read_csv.assert_any_call(filepath_or_buffer=temp_dir / "bar.csv", header=[0], bla=True) + + +@patch("power_grid_model_io.data_stores.csv_dir_store.pd.DataFrame.to_csv") +def test_save(mock_to_csv: MagicMock, temp_dir): + # Arrange + foo_data = pd.DataFrame() + bar_data = pd.DataFrame() + data = TabularData(foo=foo_data, bar=bar_data) + csv_dir = CsvDirStore(temp_dir, bla=True) + + # Act + csv_dir.save(data) + + # Assert + mock_to_csv.assert_any_call(path_or_buf=temp_dir / "foo.csv", bla=True) + mock_to_csv.assert_any_call(path_or_buf=temp_dir / "bar.csv", bla=True)