Skip to content

Commit

Permalink
Lazy loading CSV files (or tables in general)
Browse files Browse the repository at this point in the history
Signed-off-by: Bram Stoeller <[email protected]>
  • Loading branch information
bramstoeller committed Mar 2, 2023
1 parent 574cf6f commit d2c1530
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions src/power_grid_model_io/data_stores/csv_dir_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

from pathlib import Path
from typing import Any, Dict, List
from typing import Any, Callable, Dict, List

import pandas as pd

Expand All @@ -26,17 +26,24 @@ class CsvDirStore(BaseDataStore[TabularData]):

def __init__(self, dir_path: Path, **csv_kwargs):
super().__init__()
self._dir_path = dir_path
self._dir_path = Path(dir_path)
self._csv_kwargs: Dict[str, Any] = csv_kwargs
self._header_rows: List[int] = [0]

def load(self) -> TabularData:
"""
Load all CSV files in a directory as tabular data.
Create a lazy loader for all CSV files in a directory and store them in a TabularData instance.
"""
data: Dict[str, pd.DataFrame] = {}

def lazy_csv_loader(csv_path: Path) -> Callable[[], pd.DataFrame]:
def csv_loader():
return pd.read_csv(filepath_or_buffer=csv_path, header=self._header_rows, **self._csv_kwargs)

return csv_loader

data: Dict[str, Callable[[], pd.DataFrame]] = {}
for path in self._dir_path.glob("*.csv"):
data[path.stem] = pd.read_csv(filepath_or_buffer=path, header=self._header_rows, **self._csv_kwargs)
data[path.stem] = lazy_csv_loader(path)

return TabularData(**data)

Expand Down

0 comments on commit d2c1530

Please sign in to comment.