Skip to content

Commit

Permalink
Merge pull request #42 from alliander-opensource/feature/csv-dir-store
Browse files Browse the repository at this point in the history
Feature/csv dir store
  • Loading branch information
bramstoeller committed Mar 2, 2023
2 parents d758f0e + 6a3b435 commit 63b0b94
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 3 deletions.
55 changes: 55 additions & 0 deletions src/power_grid_model_io/data_stores/csv_dir_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model IO project <[email protected]>
#
# SPDX-License-Identifier: MPL-2.0
"""
CSV Directory Store
"""

from pathlib import Path
from typing import Any, Dict, List

import pandas as pd

from power_grid_model_io.data_stores.base_data_store import BaseDataStore
from power_grid_model_io.data_types import LazyDataFrame, TabularData


class CsvDirStore(BaseDataStore[TabularData]):
"""
CSV Directory Store
The first row of each .csv file is expected to contain the column names, unless specified differently by an
extension of this class.
"""

__slots__ = ("_dir_path", "_csv_kwargs", "_header_rows")

def __init__(self, dir_path: Path, **csv_kwargs):
super().__init__()
self._dir_path = Path(dir_path)
self._csv_kwargs: Dict[str, Any] = csv_kwargs
self._header_rows: List[int] = [0]

def load(self) -> TabularData:
"""
Create a lazy loader for all CSV files in a directory and store them in a TabularData instance.
"""

def lazy_csv_loader(csv_path: Path) -> LazyDataFrame:
def csv_loader():
return pd.read_csv(filepath_or_buffer=csv_path, header=self._header_rows, **self._csv_kwargs)

return csv_loader

data: Dict[str, LazyDataFrame] = {}
for path in self._dir_path.glob("*.csv"):
data[path.stem] = lazy_csv_loader(path)

return TabularData(**data)

def save(self, data: TabularData) -> None:
"""
Store each table in data as a separate CSV file
"""
for table_name, table_data in data.items():
table_data.to_csv(path_or_buf=self._dir_path / f"{table_name}.csv", **self._csv_kwargs)
3 changes: 1 addition & 2 deletions src/power_grid_model_io/data_stores/excel_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
import pandas as pd

from power_grid_model_io.data_stores.base_data_store import BaseDataStore
from power_grid_model_io.data_types import TabularData
from power_grid_model_io.data_types.tabular_data import LazyDataFrame
from power_grid_model_io.data_types import LazyDataFrame, TabularData


class ExcelFileStore(BaseDataStore[TabularData]):
Expand Down
2 changes: 1 addition & 1 deletion src/power_grid_model_io/data_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
"""

from power_grid_model_io.data_types._data_types import ExtraInfo, ExtraInfoLookup, StructuredData
from power_grid_model_io.data_types.tabular_data import TabularData
from power_grid_model_io.data_types.tabular_data import LazyDataFrame, TabularData
60 changes: 60 additions & 0 deletions tests/unit/data_stores/test_csv_dir_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SPDX-FileCopyrightText: 2022 Contributors to the Power Grid Model project <[email protected]>
#
# SPDX-License-Identifier: MPL-2.0

import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch

import pandas as pd
import pytest

from power_grid_model_io.data_stores.csv_dir_store import CsvDirStore
from power_grid_model_io.data_types import TabularData


@pytest.fixture()
def temp_dir():
with tempfile.TemporaryDirectory() as tmp:
yield Path(tmp).resolve()


def touch(file_path: Path):
open(file_path, "wb").close()


@patch("power_grid_model_io.data_stores.csv_dir_store.pd.read_csv")
def test_load(mock_read_csv: MagicMock, temp_dir: Path):
# Arrange
foo_data = MagicMock()
bar_data = MagicMock()
touch(temp_dir / "foo.csv")
touch(temp_dir / "bar.csv")
mock_read_csv.side_effect = (foo_data, bar_data)
csv_dir = CsvDirStore(temp_dir, bla=True)

# Act
csv_data = csv_dir.load()

# Assert
mock_read_csv.assert_not_called() # The csv data is not yet loaded
assert csv_data["foo"] == foo_data
assert csv_data["bar"] == bar_data
mock_read_csv.assert_any_call(filepath_or_buffer=temp_dir / "foo.csv", header=[0], bla=True)
mock_read_csv.assert_any_call(filepath_or_buffer=temp_dir / "bar.csv", header=[0], bla=True)


@patch("power_grid_model_io.data_stores.csv_dir_store.pd.DataFrame.to_csv")
def test_save(mock_to_csv: MagicMock, temp_dir):
# Arrange
foo_data = pd.DataFrame()
bar_data = pd.DataFrame()
data = TabularData(foo=foo_data, bar=bar_data)
csv_dir = CsvDirStore(temp_dir, bla=True)

# Act
csv_dir.save(data)

# Assert
mock_to_csv.assert_any_call(path_or_buf=temp_dir / "foo.csv", bla=True)
mock_to_csv.assert_any_call(path_or_buf=temp_dir / "bar.csv", bla=True)

0 comments on commit 63b0b94

Please sign in to comment.