From e926849d6a551d3c4469d9acad63c8edb00ac2b8 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Thu, 2 Mar 2023 13:43:38 +0100 Subject: [PATCH 1/2] Simplify lazy loader Signed-off-by: Bram Stoeller --- .../data_stores/excel_file_store.py | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/src/power_grid_model_io/data_stores/excel_file_store.py b/src/power_grid_model_io/data_stores/excel_file_store.py index 1100d4ff..47f3e82a 100644 --- a/src/power_grid_model_io/data_stores/excel_file_store.py +++ b/src/power_grid_model_io/data_stores/excel_file_store.py @@ -35,7 +35,6 @@ def __init__(self, file_path: Optional[Path] = None, **extra_paths: Path): # Create a dictionary of all supplied file paths: # {"": file_path, extra_name[0]: extra_path[0], extra_name[1]: extra_path[1], ...} self._file_paths: Dict[str, Path] = {} - self._excel_files: Dict[str, pd.ExcelFile] = {} if file_path is not None: self._file_paths[""] = file_path for name, path in extra_paths.items(): @@ -64,11 +63,21 @@ def load(self) -> TabularData: have no prefix, while the tables of all the extra files will be prefixed with the name of the key word argument as supplied in the constructor. """ + + def lazy_sheet_loader(xls_file: pd.ExcelFile, xls_sheet_name: str): + def sheet_loader(): + sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows) + sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data) + sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=xls_sheet_name) + return sheet_data + + return sheet_loader + data: Dict[str, LazyDataFrame] = {} for name, path in self._file_paths.items(): - self._excel_files[name] = pd.ExcelFile(path) - for sheet_name in self._excel_files[name].sheet_names: - loader = self._load_sheet_wrapper(name, sheet_name) + excel_file = pd.ExcelFile(path) + for sheet_name in excel_file.sheet_names: + loader = lazy_sheet_loader(excel_file, sheet_name) if name: sheet_name = f"{name}.{sheet_name}" if sheet_name in data: @@ -76,25 +85,6 @@ def load(self) -> TabularData: data[sheet_name] = loader return TabularData(**data) - def _load_sheet_wrapper(self, name: str, sheet_name: str): - """ - Load a single Excel sheet as a Pandas DataFrame. - - Args: - name: the name of the file (empty string for the main sheet) - sheet_name: the name of the sheet - - Returns: The contents the specified Excel sheet. - """ - - def wrapper(): - sheet_data = self._excel_files[name].parse(sheet_name, header=self._header_rows) - sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data) - sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=sheet_name) - return sheet_data - - return wrapper - def save(self, data: TabularData) -> None: """ Store tabular data as one or more Excel file. From dbce5e06f39bb42ebdd0a6d0e605b57b8b41cc39 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Thu, 2 Mar 2023 13:44:58 +0100 Subject: [PATCH 2/2] Simplify lazy loader Signed-off-by: Bram Stoeller --- src/power_grid_model_io/data_stores/excel_file_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/power_grid_model_io/data_stores/excel_file_store.py b/src/power_grid_model_io/data_stores/excel_file_store.py index 47f3e82a..58adfe2a 100644 --- a/src/power_grid_model_io/data_stores/excel_file_store.py +++ b/src/power_grid_model_io/data_stores/excel_file_store.py @@ -78,7 +78,7 @@ def sheet_loader(): excel_file = pd.ExcelFile(path) for sheet_name in excel_file.sheet_names: loader = lazy_sheet_loader(excel_file, sheet_name) - if name: + if name != "": # If the Excel file is not the main file, prefix the sheet name with the file name sheet_name = f"{name}.{sheet_name}" if sheet_name in data: raise ValueError(f"Duplicate sheet name '{sheet_name}'")