From c48584edc45ac6f78c33739d71065742516d2b45 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Mon, 13 Feb 2023 18:18:10 -0700 Subject: [PATCH 01/26] Remove plant_name_eia in pudl.transform.eia923.fuel_receipts_costs() --- src/pudl/transform/eia923.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 4247dbf2e2..8f9bb6509a 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -1072,7 +1072,6 @@ def fuel_receipts_costs(eia923_dfs, eia923_transformed_dfs): # Drop fields we're not inserting into the fuel_receipts_costs_eia923 # table. cols_to_drop = [ - "plant_name_eia", "plant_state", "operator_name", "operator_id", From 13a0dc110e3d780481491ec41f09edb77d59749b Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Thu, 16 Feb 2023 18:48:28 -0700 Subject: [PATCH 02/26] Don't discard plant_name_eia from pudl.transform.eia923.generation_fuel() --- src/pudl/transform/eia923.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 8f9bb6509a..bb849371b6 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -617,7 +617,6 @@ def generation_fuel(eia923_dfs, eia923_transformed_dfs): # Drop fields we're not inserting into the generation_fuel_eia923 table. cols_to_drop = [ "combined_heat_power", - "plant_name_eia", "operator_name", "operator_id", "plant_state", From b4a3c28f7816d67a609316f82b25ab3fd13e080c Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Fri, 24 Feb 2023 19:50:14 -0700 Subject: [PATCH 03/26] Don't drop operator_name/utility_name_eia in pudl.transform.eia923.generation_fuel() --- .../eia923/column_maps/generation_fuel.csv | 2 +- src/pudl/transform/eia.py | 13 +++++++++++++ src/pudl/transform/eia923.py | 1 - 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv index 4def3915bf..c518ef25b4 100644 --- a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv @@ -3,7 +3,7 @@ plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plan combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant nuclear_unit_id,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,state,state,state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index b1c9306bfa..62c0dca5d6 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -547,6 +547,19 @@ def harvesting( # noqa: C901 entity_df = _add_additional_epacems_plants(entity_df) entity_df = _add_timezone(entity_df) + # Remove fields that came from input data but aren't in the + # corresponding SQLite tables. The data may still exist but has been + # moved elsewhere. + if entity == "utilities": + eia_transformed_dfs["generation_fuel_eia923"] = ( + eia_transformed_dfs["generation_fuel_eia923"]. + drop(columns=["utility_name_eia"]) + ) + eia_transformed_dfs["generation_fuel_nuclear_eia923"] = ( + eia_transformed_dfs["generation_fuel_nuclear_eia923"]. + drop(columns=["utility_name_eia"]) + ) + eia_transformed_dfs[f"{entity}_annual_eia"] = annual_df entities_dfs[f"{entity}_entity_eia"] = entity_df if debug: diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index bb849371b6..18e29f0e7e 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -617,7 +617,6 @@ def generation_fuel(eia923_dfs, eia923_transformed_dfs): # Drop fields we're not inserting into the generation_fuel_eia923 table. cols_to_drop = [ "combined_heat_power", - "operator_name", "operator_id", "plant_state", "census_region", From 35b6c60c5d369d0be630eed550104b3563e0d00a Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Sun, 26 Feb 2023 21:03:20 -0700 Subject: [PATCH 04/26] Move newly added code to what seems like a more sensible location --- src/pudl/transform/eia.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 62c0dca5d6..e5e1d6caa7 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -547,19 +547,6 @@ def harvesting( # noqa: C901 entity_df = _add_additional_epacems_plants(entity_df) entity_df = _add_timezone(entity_df) - # Remove fields that came from input data but aren't in the - # corresponding SQLite tables. The data may still exist but has been - # moved elsewhere. - if entity == "utilities": - eia_transformed_dfs["generation_fuel_eia923"] = ( - eia_transformed_dfs["generation_fuel_eia923"]. - drop(columns=["utility_name_eia"]) - ) - eia_transformed_dfs["generation_fuel_nuclear_eia923"] = ( - eia_transformed_dfs["generation_fuel_nuclear_eia923"]. - drop(columns=["utility_name_eia"]) - ) - eia_transformed_dfs[f"{entity}_annual_eia"] = annual_df entities_dfs[f"{entity}_entity_eia"] = entity_df if debug: @@ -1183,6 +1170,18 @@ def transform( "boilers_annual_eia", ) + # Remove fields that came from input data but aren't in the + # corresponding SQLite tables. The data may still exist but has been + # moved elsewhere. + eia_transformed_dfs["generation_fuel_eia923"] = ( + eia_transformed_dfs["generation_fuel_eia923"]. + drop(columns=["utility_name_eia"]) + ) + eia_transformed_dfs["generation_fuel_nuclear_eia923"] = ( + eia_transformed_dfs["generation_fuel_nuclear_eia923"]. + drop(columns=["utility_name_eia"]) + ) + eia_transformed_dfs["plants_eia860"] = fillna_balancing_authority_codes_via_names( df=eia_transformed_dfs["plants_eia860"] ).pipe( From d6021ac6ec9755437f9ae94c23cbdc2eea488f71 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 Feb 2023 04:30:33 +0000 Subject: [PATCH 05/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/eia.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index e5e1d6caa7..23a4c4b98d 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1173,14 +1173,12 @@ def transform( # Remove fields that came from input data but aren't in the # corresponding SQLite tables. The data may still exist but has been # moved elsewhere. - eia_transformed_dfs["generation_fuel_eia923"] = ( - eia_transformed_dfs["generation_fuel_eia923"]. - drop(columns=["utility_name_eia"]) - ) - eia_transformed_dfs["generation_fuel_nuclear_eia923"] = ( - eia_transformed_dfs["generation_fuel_nuclear_eia923"]. - drop(columns=["utility_name_eia"]) - ) + eia_transformed_dfs["generation_fuel_eia923"] = eia_transformed_dfs[ + "generation_fuel_eia923" + ].drop(columns=["utility_name_eia"]) + eia_transformed_dfs["generation_fuel_nuclear_eia923"] = eia_transformed_dfs[ + "generation_fuel_nuclear_eia923" + ].drop(columns=["utility_name_eia"]) eia_transformed_dfs["plants_eia860"] = fillna_balancing_authority_codes_via_names( df=eia_transformed_dfs["plants_eia860"] From adc4503d134fca1761aaff9af20250f1192deb5e Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Mon, 27 Feb 2023 18:25:02 -0700 Subject: [PATCH 06/26] Extract the core of AbstractTableTransformer.enforce_schema() into Resource, so it can be used from elsewhere --- src/pudl/metadata/classes.py | 16 ++++++++++++++++ src/pudl/transform/classes.py | 14 +------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index a1b445a4bb..2b1c662d47 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -1676,6 +1676,22 @@ def encode(self, df: pd.DataFrame) -> pd.DataFrame: ) return df + def enforce_schema(self, df: pd.DataFrame) -> pd.DataFrame: + """Drop columns not in the DB schema and enforce specified types.""" + expected_cols = pd.Index(self.get_field_names()) + missing_cols = list(expected_cols.difference(df.columns)) + if missing_cols: + raise ValueError( + f"{self.name}: Missing columns found when enforcing table " + f"schema: {missing_cols}" + ) + df = self.format_df(df) + pk = self.schema.primary_key + if pk and not df[df.duplicated(subset=pk)].empty: + raise ValueError( + f"{self.name} Duplicate primary keys when enforcing schema." + ) + return df # ---- Package ---- # diff --git a/src/pudl/transform/classes.py b/src/pudl/transform/classes.py index 8123b61300..ee756912a3 100644 --- a/src/pudl/transform/classes.py +++ b/src/pudl/transform/classes.py @@ -1263,17 +1263,5 @@ def enforce_schema(self, df: pd.DataFrame) -> pd.DataFrame: """Drop columns not in the DB schema and enforce specified types.""" logger.info(f"{self.table_id.value}: Enforcing database schema on dataframe.") resource = Package.from_resource_ids().get_resource(self.table_id.value) - expected_cols = pd.Index(resource.get_field_names()) - missing_cols = list(expected_cols.difference(df.columns)) - if missing_cols: - raise ValueError( - f"{self.table_id.value}: Missing columns found when enforcing table " - f"schema: {missing_cols}" - ) - df = resource.format_df(df) - pk = resource.schema.primary_key - if pk and not df[df.duplicated(subset=pk)].empty: - raise ValueError( - f"{self.table_id.value} Duplicate primary keys when enforcing schema." - ) + df = resource.enforce_schema(df) return df From c44949b9203b71d060bd1910d4e1c04f4be1cc18 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Feb 2023 01:27:20 +0000 Subject: [PATCH 07/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/metadata/classes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 2b1c662d47..baf25beb1d 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -1693,6 +1693,7 @@ def enforce_schema(self, df: pd.DataFrame) -> pd.DataFrame: ) return df + # ---- Package ---- # From 731baef0049e1b47a847dec0af82ae8012ced03b Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Tue, 28 Feb 2023 17:00:40 -0700 Subject: [PATCH 08/26] Use Resource.endorse_schema() to clean up non-matching fields in DataFrames before converting to SQLite --- src/pudl/transform/eia.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 23a4c4b98d..3328c8aff6 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1173,12 +1173,13 @@ def transform( # Remove fields that came from input data but aren't in the # corresponding SQLite tables. The data may still exist but has been # moved elsewhere. - eia_transformed_dfs["generation_fuel_eia923"] = eia_transformed_dfs[ - "generation_fuel_eia923" - ].drop(columns=["utility_name_eia"]) - eia_transformed_dfs["generation_fuel_nuclear_eia923"] = eia_transformed_dfs[ - "generation_fuel_nuclear_eia923" - ].drop(columns=["utility_name_eia"]) + for cat in eia_transformed_dfs: + resource = ( + pudl.metadata.classes.Package.from_resource_ids(). + get_resource(cat) + ) + eia_transformed_dfs[cat] = resource.enforce_schema( + eia_transformed_dfs[cat]) eia_transformed_dfs["plants_eia860"] = fillna_balancing_authority_codes_via_names( df=eia_transformed_dfs["plants_eia860"] From fbe7b21b82639432d4db2bed91a1a3d4e33d86e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Mar 2023 00:02:03 +0000 Subject: [PATCH 09/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/eia.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 3328c8aff6..015f1aca87 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1174,12 +1174,8 @@ def transform( # corresponding SQLite tables. The data may still exist but has been # moved elsewhere. for cat in eia_transformed_dfs: - resource = ( - pudl.metadata.classes.Package.from_resource_ids(). - get_resource(cat) - ) - eia_transformed_dfs[cat] = resource.enforce_schema( - eia_transformed_dfs[cat]) + resource = pudl.metadata.classes.Package.from_resource_ids().get_resource(cat) + eia_transformed_dfs[cat] = resource.enforce_schema(eia_transformed_dfs[cat]) eia_transformed_dfs["plants_eia860"] = fillna_balancing_authority_codes_via_names( df=eia_transformed_dfs["plants_eia860"] From e2768f9c707010fc29644037f57a57866140c47d Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Tue, 28 Feb 2023 21:12:10 -0700 Subject: [PATCH 10/26] Move DataFrame cleaning to the end of transform() --- src/pudl/transform/eia.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 3328c8aff6..68e26d3901 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1170,6 +1170,13 @@ def transform( "boilers_annual_eia", ) + eia_transformed_dfs["plants_eia860"] = fillna_balancing_authority_codes_via_names( + df=eia_transformed_dfs["plants_eia860"] + ).pipe( + fix_balancing_authority_codes_with_state, + plants_entity=entities_dfs["plants_entity_eia"], + ) + # Remove fields that came from input data but aren't in the # corresponding SQLite tables. The data may still exist but has been # moved elsewhere. @@ -1181,10 +1188,4 @@ def transform( eia_transformed_dfs[cat] = resource.enforce_schema( eia_transformed_dfs[cat]) - eia_transformed_dfs["plants_eia860"] = fillna_balancing_authority_codes_via_names( - df=eia_transformed_dfs["plants_eia860"] - ).pipe( - fix_balancing_authority_codes_with_state, - plants_entity=entities_dfs["plants_entity_eia"], - ) return entities_dfs, eia_transformed_dfs From 508ba29ec309ae797aa5366b5c14034f54f78254 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Thu, 2 Mar 2023 14:11:55 -0700 Subject: [PATCH 11/26] Call enforce_schema() on entities DataFrames as well --- src/pudl/transform/eia.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 0e3b312f10..63a0eb7bc7 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1183,5 +1183,8 @@ def transform( for cat in eia_transformed_dfs: resource = pudl.metadata.classes.Package.from_resource_ids().get_resource(cat) eia_transformed_dfs[cat] = resource.enforce_schema(eia_transformed_dfs[cat]) + for cat in entities_dfs: + resource = pudl.metadata.classes.Package.from_resource_ids().get_resource(cat) + entities_dfs[cat] = resource.enforce_schema(entities_dfs[cat]) return entities_dfs, eia_transformed_dfs From dc5068e7fc48bf434e52872f0e2929d762da08b9 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Fri, 17 Mar 2023 16:06:56 -0600 Subject: [PATCH 12/26] Don't drop combined_heat_power in boiler_fuel() --- src/pudl/package_data/eia923/column_maps/boiler_fuel.csv | 2 +- src/pudl/transform/eia923.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv index 6f8ba8f4e9..4907a33b7a 100644 --- a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv @@ -1,6 +1,6 @@ year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant +associated_combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 18e29f0e7e..3c04c0d798 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -760,6 +760,7 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da + relative_cols + key_cols + ["prime_mover_code", "sector_id_eia", "sector_name_eia"] + + ["associated_combined_heat_power"] ) actual_cols = set(boiler_fuel_df.columns) difference = actual_cols.symmetric_difference(expected_cols) @@ -830,7 +831,6 @@ def boiler_fuel(eia923_dfs, eia923_transformed_dfs): # Need to stop dropping fields that contain harvestable entity attributes. # See https://github.com/catalyst-cooperative/pudl/issues/509 cols_to_drop = [ - "combined_heat_power", "plant_name_eia", "operator_name", "operator_id", From fdada6100da98bdbcc91c6412718b5d2e31899ef Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Mon, 20 Mar 2023 09:31:48 -0600 Subject: [PATCH 13/26] One more incremental change in boiler_fuel() --- src/pudl/transform/eia923.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 3c04c0d798..52d37ffb24 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -752,15 +752,19 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da quantity_cols = [ "fuel_consumed_units", ] - relative_cols = ["ash_content_pct", "sulfur_content_pct", "fuel_mmbtu_per_unit"] - key_cols = ["boiler_id", "energy_source_code", "plant_id_eia", "report_date"] + relative_cols = [ + "ash_content_pct", "sulfur_content_pct", "fuel_mmbtu_per_unit" + ] + key_cols = [ + "boiler_id", "energy_source_code", "plant_id_eia", "report_date" + ] + other_cols = [ + "prime_mover_code", "sector_id_eia", "sector_name_eia", + "associated_combined_heat_power", "plant_name_eia" + ] expected_cols = set( - quantity_cols - + relative_cols - + key_cols - + ["prime_mover_code", "sector_id_eia", "sector_name_eia"] - + ["associated_combined_heat_power"] + quantity_cols + relative_cols + key_cols + other_cols ) actual_cols = set(boiler_fuel_df.columns) difference = actual_cols.symmetric_difference(expected_cols) @@ -831,7 +835,6 @@ def boiler_fuel(eia923_dfs, eia923_transformed_dfs): # Need to stop dropping fields that contain harvestable entity attributes. # See https://github.com/catalyst-cooperative/pudl/issues/509 cols_to_drop = [ - "plant_name_eia", "operator_name", "operator_id", "plant_state", From 40ef5f491ae108d639c0f27cc4b6f06b84e07deb Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Mon, 20 Mar 2023 11:10:03 -0600 Subject: [PATCH 14/26] Several more fields not dropped --- devtools/eia-etl-debug.ipynb | 3636 +++++++++++++++++++++++++++++++++- src/pudl/transform/eia923.py | 7 +- 2 files changed, 3618 insertions(+), 25 deletions(-) diff --git a/devtools/eia-etl-debug.ipynb b/devtools/eia-etl-debug.ipynb index 9ca2d83f74..526c778021 100644 --- a/devtools/eia-etl-debug.ipynb +++ b/devtools/eia-etl-debug.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -66,8 +66,8 @@ "eia860_data_source = DataSource.from_id(\"eia860\")\n", "eia860_settings = Eia860Settings(\n", "# Limit the years as needed if you're testing only a few of them. E.g.:\n", - " years=[2021],\n", - "# years=eia860_data_source.working_partitions[\"years\"]\n", + "# years=[2021],\n", + " years=eia860_data_source.working_partitions[\"years\"]\n", "# By default all of the tables will be processed.\n", "# Select the relevant tables as needed if you're testing only a few of them. E.g.:\n", "# tables=[\"generation_fuel_nuclear_eia923\", \"generation_fuel_eia923\"]\n", @@ -76,8 +76,8 @@ "eia923_data_source = DataSource.from_id(\"eia923\")\n", "eia923_settings = Eia923Settings(\n", "# Limit the years as needed if you're testing only a few of them. E.g.:\n", - " years = [2021]\n", - " # years = eia923_data_source.working_partitions[\"years\"]\n", + "# years = [2021]\n", + " years = eia923_data_source.working_partitions[\"years\"]\n", "# By default all of the tables will be processed.\n", "# Select the relevant tables as needed if you're testing only a few of them. E.g.:\n", "# tables=[\"generation_fuel_nuclear_eia923\", \"generation_fuel_eia923\"]\n", @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -119,9 +119,384 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:09:03 [ INFO] catalystcoop.pudl.extract.excel:237 Extracting eia860 spreadsheet data.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting eia860 spreadsheet data.\n", + "boiler_cool\n", + "boiler_fgd\n", + "boiler_fgp\n", + "boiler_gen\n", + "boiler_stackflue\n", + "Boiler Cooler\n", + "Boiler FGD\n", + "Boiler FGP\n", + "Boiler Generator\n", + "Boiler Stack Flue\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "Boiler Cooling\n", + "Boiler Generator\n", + "Boiler Mercury\n", + "Boiler NOx\n", + "Boiler Particulate Matter\n", + "Boiler SO2\n", + "Boiler Stack Flue\n", + "Emissions Control Equipment\n", + "boiler\n", + "controls\n", + "cooling\n", + "fgd\n", + "fgp\n", + "stack_flue\n", + "Boiler\n", + "Controls\n", + "Cooling\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n", + "Boiler Info & Design Parameters\n", + "Cooling\n", + "Emission Standards & Strategies\n", + "FGD\n", + "FGP\n", + "Stack Flue\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "operable\n", + "proposed\n", + "retired & canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "operable\n", + "proposed\n", + "retired\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n", + "Operable\n", + "Proposed\n", + "Retired and Canceled\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ownership2011\n", + "ownership2012\n", + "Ownership\n", + "Ownership\n", + "Ownership\n", + "Ownership\n", + "Ownership\n", + "Ownership\n", + "Ownership\n", + "Ownership\n", + "Ownership\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "plant2011\n", + "plant2012\n", + "Plant\n", + "Plant\n", + "Plant\n", + "Plant\n", + "Plant\n", + "Plant\n", + "Plant\n", + "Plant\n", + "Plant\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n", + "/home/knordback/Kurt/pudl/src/pudl/helpers.py:1556: FutureWarning: save is not part of the public API, usage can give unexpected results and will be removed in a future version\n", + " writer.save()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "utility2011\n", + "utility2012\n", + "Utility\n", + "Utility\n", + "Utility\n", + "Utility\n", + "Utility\n", + "Utility\n", + "Utility\n", + "Utility\n", + "Utility\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:18:27 [ INFO] catalystcoop.pudl.extract.excel:237 Extracting eia860m spreadsheet data.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting eia860m spreadsheet data.\n", + "Operating\n", + "Canceled or Postponed\n", + "Operating_PR\n", + "Planned\n", + "Planned_PR\n", + "Retired\n", + "Retired_PR\n", + "CPU times: user 9min 40s, sys: 1.52 s, total: 9min 42s\n", + "Wall time: 9min 42s\n" + ] + } + ], "source": [ "%%time\n", "eia860_extractor = pudl.extract.eia860.Extractor(ds)\n", @@ -147,9 +522,1188 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:18:46 [ INFO] catalystcoop.pudl.transform.eia860:817 Transforming raw EIA 860 DataFrames for ownership_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 860 DataFrames for ownership_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:19:01 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding ownership_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding ownership_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:19:01 [ INFO] catalystcoop.pudl.transform.eia860:817 Transforming raw EIA 860 DataFrames for generators_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 860 DataFrames for generators_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.operational_status_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.operational_status_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.prime_mover_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.prime_mover_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_5\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_6\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_1_transport_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_1_transport_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_1_transport_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_1_transport_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_1_transport_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_1_transport_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_2_transport_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_2_transport_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_2_transport_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_2_transport_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_2_transport_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_2_transport_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.planned_new_prime_mover_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.planned_new_prime_mover_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.planned_energy_source_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.planned_energy_source_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:59 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:59 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:20:59 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:00 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:00 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:01 [ INFO] catalystcoop.pudl.transform.eia860:817 Transforming raw EIA 860 DataFrames for plants_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 860 DataFrames for plants_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:13 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.balancing_authority_code_eia\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.balancing_authority_code_eia\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:13 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.sector_id_eia\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.sector_id_eia\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:13 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:13 [ INFO] catalystcoop.pudl.transform.eia860:817 Transforming raw EIA 860 DataFrames for boilers_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 860 DataFrames for boilers_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:52 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_status\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_status\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:52 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_type\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_type\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:52 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.firing_type_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.firing_type_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:52 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.firing_type_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.firing_type_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.firing_type_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.firing_type_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.wet_dry_bottom\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.wet_dry_bottom\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_particulate\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_particulate\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_so2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_so2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_nox\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_nox\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.unit_particulate\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.unit_particulate\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.unit_so2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.unit_so2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.unit_nox\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.unit_nox\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_5\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_6\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_proposed_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_proposed_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_proposed_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_proposed_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_proposed_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_proposed_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_manufacturer_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_manufacturer_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_status_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_status_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_mercury\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_mercury\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:57 [ INFO] catalystcoop.pudl.transform.eia860:817 Transforming raw EIA 860 DataFrames for boiler_generator_assn_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 860 DataFrames for boiler_generator_assn_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boiler_generator_assn_eia860.boiler_generator_assn_type_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boiler_generator_assn_eia860.boiler_generator_assn_type_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boiler_generator_assn_eia860.steam_plant_type_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boiler_generator_assn_eia860.steam_plant_type_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boiler_generator_assn_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boiler_generator_assn_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:21:58 [ INFO] catalystcoop.pudl.transform.eia860:817 Transforming raw EIA 860 DataFrames for utilities_eia860 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 860 DataFrames for utilities_eia860 concatenated across all years.\n", + "CPU times: user 2min 55s, sys: 20.2 s, total: 3min 15s\n", + "Wall time: 3min 15s\n" + ] + } + ], "source": [ "%%time\n", "eia860_transformed_dfs = pudl.transform.eia860.transform(\n", @@ -174,9 +1728,141 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:22:02 [ INFO] catalystcoop.pudl.extract.excel:237 Extracting eia923 spreadsheet data.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting eia923 spreadsheet data.\n", + "Page 2 Oil Stocks Data\n", + "Page 1 Generation and Fuel Data\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 2 Oil Stocks Data\n", + "Page 1 Generation and Fuel Data\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 2 Oil Stocks Data\n", + "Page 1 Generation and Fuel Data\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 1 Generation and Fuel Data\n", + "Page 1 Energy Storage\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 1 Generation and Fuel Data\n", + "Page 1 Energy Storage\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 1 Generation and Fuel Data\n", + "Page 1 Energy Storage\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 1 Generation and Fuel Data\n", + "Page 2 Coal Stocks Data\n", + "Page 1 Energy Storage\n", + "Page 1 Puerto Rico\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 6 Plant Frame Puerto Rico\n", + "Page 1 Generation and Fuel Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 1 Energy Storage\n", + "Page 1 Puerto Rico\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 6 Plant Frame\n", + "Page 6 Plant Frame Puerto Rico\n", + "Page 1 Energy Storage\n", + "Page 1 Generation and Fuel Data\n", + "Page 1 Puerto Rico\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 6 Plant Frame Puerto Rico\n", + "Page 1 Generation and Fuel Data\n", + "Page 1 Energy Storage\n", + "Page 1 Puerto Rico\n", + "Page 2 Coal Stocks Data\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Petcoke Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 6 Plant Frame Puerto Rico\n", + "Page 1 Energy Storage\n", + "Page 1 Generation and Fuel Data\n", + "Page 1 Puerto Rico\n", + "Page 2 Oil Stocks Data\n", + "Page 2 Stocks Data\n", + "Page 3 Boiler Fuel Data\n", + "Page 4 Generator Data\n", + "Page 5 Fuel Receipts and Costs\n", + "Page 6 Plant Frame\n", + "Page 6 Plant Frame Puerto Rico\n", + "CPU times: user 8min 22s, sys: 1.49 s, total: 8min 23s\n", + "Wall time: 8min 24s\n" + ] + } + ], "source": [ "%%time\n", "eia923_extractor = pudl.extract.eia923.Extractor(ds)\n", @@ -192,9 +1878,348 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:30:26 [ INFO] catalystcoop.pudl.transform.eia923:1216 Transforming raw EIA 923 DataFrames for generation_fuel_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 923 DataFrames for generation_fuel_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:31:44 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generation_fuel_eia923.energy_source_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generation_fuel_eia923.energy_source_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:31:44 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generation_fuel_eia923.fuel_type_code_aer\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generation_fuel_eia923.fuel_type_code_aer\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:31:45 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generation_fuel_eia923.prime_mover_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generation_fuel_eia923.prime_mover_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:31:46 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generation_fuel_eia923.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generation_fuel_eia923.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:31:58 [ INFO] catalystcoop.pudl.transform.eia923:1216 Transforming raw EIA 923 DataFrames for boiler_fuel_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 923 DataFrames for boiler_fuel_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:31 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boiler_fuel_eia923.energy_source_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boiler_fuel_eia923.energy_source_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:33 [ INFO] catalystcoop.pudl.transform.eia923:1216 Transforming raw EIA 923 DataFrames for generation_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 923 DataFrames for generation_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:41 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generation_eia923.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generation_eia923.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:41 [ INFO] catalystcoop.pudl.transform.eia923:1216 Transforming raw EIA 923 DataFrames for coalmine_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 923 DataFrames for coalmine_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:55 [ INFO] catalystcoop.pudl.helpers:203 Assigned state FIPS codes for 35.74% of records.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assigned state FIPS codes for 35.74% of records.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding coalmine_eia923.mine_type_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding coalmine_eia923.mine_type_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding coalmine_eia923.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding coalmine_eia923.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding coalmine_eia923.mine_type_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding coalmine_eia923.mine_type_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding coalmine_eia923.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding coalmine_eia923.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:32:57 [ INFO] catalystcoop.pudl.transform.eia923:1216 Transforming raw EIA 923 DataFrames for fuel_receipts_costs_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transforming raw EIA 923 DataFrames for fuel_receipts_costs_eia923 concatenated across all years.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:10 [ INFO] catalystcoop.pudl.helpers:203 Assigned state FIPS codes for 35.74% of records.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assigned state FIPS codes for 35.74% of records.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:10 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding coalmine_eia923.mine_type_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding coalmine_eia923.mine_type_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:10 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding coalmine_eia923.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding coalmine_eia923.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:33 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding fuel_receipts_costs_eia923.contract_type_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding fuel_receipts_costs_eia923.contract_type_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:33 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding fuel_receipts_costs_eia923.energy_source_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding fuel_receipts_costs_eia923.energy_source_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:34 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding fuel_receipts_costs_eia923.primary_transportation_mode_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding fuel_receipts_costs_eia923.primary_transportation_mode_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:34 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding fuel_receipts_costs_eia923.secondary_transportation_mode_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding fuel_receipts_costs_eia923.secondary_transportation_mode_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:33:34 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding fuel_receipts_costs_eia923.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding fuel_receipts_costs_eia923.data_maturity\n", + "CPU times: user 3min 1s, sys: 5.79 s, total: 3min 7s\n", + "Wall time: 3min 7s\n" + ] + } + ], "source": [ "%%time\n", "eia923_transformed_dfs = pudl.transform.eia923.transform(\n", @@ -219,9 +2244,1552 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:34:43 [ INFO] catalystcoop.pudl.transform.eia:1180 Harvesting IDs & consistently static attributes for EIA plants\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Harvesting IDs & consistently static attributes for EIA plants\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:35:02 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.balancing_authority_code_eia\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.balancing_authority_code_eia\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:35:11 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.reporting_frequency_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.reporting_frequency_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:35:16 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.sector_id_eia\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.sector_id_eia\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:35:18 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding plants_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding plants_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:34 [ INFO] catalystcoop.pudl.transform.eia:543 Average consistency of static plants values is 99.49%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average consistency of static plants values is 99.49%\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:36 [ INFO] catalystcoop.pudl.transform.eia:1180 Harvesting IDs & consistently static attributes for EIA generators\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Harvesting IDs & consistently static attributes for EIA generators\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:40 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.operational_status_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.operational_status_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:40 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.prime_mover_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.prime_mover_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:41 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:43 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:46 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:48 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:51 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_5\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_code_6\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_code_6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_1_transport_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_1_transport_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:36:59 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_1_transport_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_1_transport_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:01 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_1_transport_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_1_transport_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:04 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_2_transport_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_2_transport_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:06 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_2_transport_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_2_transport_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:08 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.energy_source_2_transport_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.energy_source_2_transport_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:11 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.planned_new_prime_mover_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.planned_new_prime_mover_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:12 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.planned_energy_source_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.planned_energy_source_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:15 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:18 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:20 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:23 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.startup_source_code_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.startup_source_code_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:37:26 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding generators_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding generators_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:05 [ INFO] catalystcoop.pudl.transform.eia:543 Average consistency of static generators values is 99.46%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average consistency of static generators values is 99.46%\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:05 [ INFO] catalystcoop.pudl.transform.eia:1180 Harvesting IDs & consistently static attributes for EIA boilers\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Harvesting IDs & consistently static attributes for EIA boilers\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:14 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_status\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_status\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:16 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_type\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_type\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:17 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.firing_type_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.firing_type_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:19 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.firing_type_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.firing_type_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:21 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.firing_type_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.firing_type_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:23 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:26 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:29 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:33 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.boiler_fuel_code_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.boiler_fuel_code_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:37 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.wet_dry_bottom\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.wet_dry_bottom\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:38 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_particulate\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_particulate\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:41 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_so2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_so2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:44 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_nox\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_nox\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:46 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.unit_particulate\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.unit_particulate\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:48 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.unit_so2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.unit_so2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:50 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.unit_nox\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.unit_nox\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:52 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:52 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:53 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.particulate_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:54 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:55 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:56 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:57 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:58 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.so2_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:59 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_out_of_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:40:59 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_out_of_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:00 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_out_of_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:01 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:01 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:02 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_existing_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:02 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:03 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:04 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_planned_caaa_compliance_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:04 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:06 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:08 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:10 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:11 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_5\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:13 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_existing_strategy_6\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_existing_strategy_6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:15 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_proposed_strategy_1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_proposed_strategy_1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:17 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_proposed_strategy_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_proposed_strategy_2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:19 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.mercury_control_proposed_strategy_3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.mercury_control_proposed_strategy_3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:21 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_manufacturer_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_manufacturer_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:21 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.nox_control_status_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.nox_control_status_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:22 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.regulation_mercury\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.regulation_mercury\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:41:25 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:42:18 [ INFO] catalystcoop.pudl.transform.eia:543 Average consistency of static boilers values is 99.99%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average consistency of static boilers values is 99.99%\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:42:19 [ INFO] catalystcoop.pudl.transform.eia:1180 Harvesting IDs & consistently static attributes for EIA utilities\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Harvesting IDs & consistently static attributes for EIA utilities\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:42:21 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding utilities_eia860.data_maturity\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding utilities_eia860.data_maturity\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:42:34 [ INFO] catalystcoop.pudl.transform.eia:543 Average consistency of static utilities values is 99.98%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average consistency of static utilities values is 99.98%\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:42:34 [ INFO] catalystcoop.pudl.transform.eia:619 Inferring complete EIA boiler-generator associations.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inferring complete EIA boiler-generator associations.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=1004, unit_id_pudl=3, unit_id_eia=['G108' '1' 'CT1']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=1004, unit_id_pudl=3, unit_id_eia=['G108' '1' 'CT1']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=1904, unit_id_pudl=1, unit_id_eia=['HBR0' 'BDS0']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=1904, unit_id_pudl=1, unit_id_eia=['HBR0' 'BDS0']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=1927, unit_id_pudl=2, unit_id_eia=['HBR0' 'RIV0']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=1927, unit_id_pudl=2, unit_id_eia=['HBR0' 'RIV0']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=4040, unit_id_pudl=1, unit_id_eia=['PWG1' 'PWG2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=4040, unit_id_pudl=1, unit_id_eia=['PWG1' 'PWG2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=7242, unit_id_pudl=1, unit_id_eia=['CC1' 'CC2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=7242, unit_id_pudl=1, unit_id_eia=['CC1' 'CC2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=7757, unit_id_pudl=1, unit_id_eia=['CC1' 'CC2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=7757, unit_id_pudl=1, unit_id_eia=['CC1' 'CC2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=10725, unit_id_pudl=1, unit_id_eia=['F801' 'F802']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=10725, unit_id_pudl=1, unit_id_eia=['F801' 'F802']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=50973, unit_id_pudl=1, unit_id_eia=['BLK1' 'BLK2' 'BLK3']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=50973, unit_id_pudl=1, unit_id_eia=['BLK1' 'BLK2' 'BLK3']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=55153, unit_id_pudl=1, unit_id_eia=['STG1' 'STG2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=55153, unit_id_pudl=1, unit_id_eia=['STG1' 'STG2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=55309, unit_id_pudl=1, unit_id_eia=['SMR2' 'SMR1']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=55309, unit_id_pudl=1, unit_id_eia=['SMR2' 'SMR1']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=55502, unit_id_pudl=1, unit_id_eia=['G801' 'CC1' 'CC2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=55502, unit_id_pudl=1, unit_id_eia=['G801' 'CC1' 'CC2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=55701, unit_id_pudl=1, unit_id_eia=['CC1' 'G961']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=55701, unit_id_pudl=1, unit_id_eia=['CC1' 'G961']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=56041, unit_id_pudl=1, unit_id_eia=['NGS' 'MGS']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=56041, unit_id_pudl=1, unit_id_eia=['NGS' 'MGS']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=56309, unit_id_pudl=1, unit_id_eia=['G401' 'G402']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=56309, unit_id_pudl=1, unit_id_eia=['G401' 'G402']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=56350, unit_id_pudl=1, unit_id_eia=['115' 'BLK1']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=56350, unit_id_pudl=1, unit_id_eia=['115' 'BLK1']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=56350, unit_id_pudl=2, unit_id_eia=['116' 'BLK2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=56350, unit_id_pudl=2, unit_id_eia=['116' 'BLK2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=56998, unit_id_pudl=1, unit_id_eia=['43' 'PB4']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=56998, unit_id_pudl=1, unit_id_eia=['43' 'PB4']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=56998, unit_id_pudl=2, unit_id_eia=['53' 'PB5']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=56998, unit_id_pudl=2, unit_id_eia=['53' 'PB5']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=57666, unit_id_pudl=1, unit_id_eia=['1' '2']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=57666, unit_id_pudl=1, unit_id_eia=['1' '2']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=57794, unit_id_pudl=1, unit_id_eia=['CC01' 'CC02']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=57794, unit_id_pudl=1, unit_id_eia=['CC01' 'CC02']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:07 [ WARNING] catalystcoop.pudl.transform.eia:924 Multiple EIA unit codes:plant_id_eia=60786, unit_id_pudl=1, unit_id_eia=['4343' '4141']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple EIA unit codes:plant_id_eia=60786, unit_id_pudl=1, unit_id_eia=['4343' '4141']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:08 [ INFO] catalystcoop.pudl.transform.eia:1065 filled 30 balancing authority codes using names.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "filled 30 balancing authority codes using names.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:08 [ INFO] catalystcoop.pudl.transform.eia:1108 Spot fixing incorrect PACW/PACE BA codes and names.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spot fixing incorrect PACW/PACE BA codes and names.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:13 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_entity_eia.boiler_manufacturer_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_entity_eia.boiler_manufacturer_code\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-20 10:43:13 [ INFO] catalystcoop.pudl.metadata.classes:1673 Recoding boilers_entity_eia.prime_mover_code\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recoding boilers_entity_eia.prime_mover_code\n", + "CPU times: user 9min 27s, sys: 9.99 s, total: 9min 37s\n", + "Wall time: 9min 38s\n" + ] + } + ], "source": [ "%%time\n", "eia_transformed_dfs = eia923_transformed_dfs.copy()\n", @@ -256,6 +3824,34 @@ "out_dfs.update(eia_transformed_dfs)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -280,7 +3876,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 52d37ffb24..5a053ad18f 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -760,7 +760,8 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da ] other_cols = [ "prime_mover_code", "sector_id_eia", "sector_name_eia", - "associated_combined_heat_power", "plant_name_eia" + "associated_combined_heat_power", "plant_name_eia", + "plant_state", "census_region", "nerc_region", "naics_code" ] expected_cols = set( @@ -837,10 +838,6 @@ def boiler_fuel(eia923_dfs, eia923_transformed_dfs): cols_to_drop = [ "operator_name", "operator_id", - "plant_state", - "census_region", - "nerc_region", - "naics_code", "fuel_unit", "total_fuel_consumption_quantity", "balancing_authority_code_eia", From bb60591931600c1609f8a96af55ef6bb1b097ec7 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Tue, 21 Mar 2023 11:46:23 -0600 Subject: [PATCH 15/26] More un-dropping of fields --- src/pudl/package_data/eia923/column_maps/boiler_fuel.csv | 4 ++-- src/pudl/transform/eia923.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv index 4907a33b7a..cb20dd38db 100644 --- a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv @@ -2,8 +2,8 @@ year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id associated_combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index ead54d12dd..3f873580bd 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -762,8 +762,6 @@ def boiler_fuel(eia923_dfs, eia923_transformed_dfs): # Need to stop dropping fields that contain harvestable entity attributes. # See https://github.com/catalyst-cooperative/pudl/issues/509 cols_to_drop = [ - "operator_name", - "operator_id", "fuel_unit", "total_fuel_consumption_quantity", "balancing_authority_code_eia", From 2efd1523b71717064fbb0034ade293d2e75d206f Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Tue, 16 May 2023 09:29:42 -0600 Subject: [PATCH 16/26] Remove code made obsolete in merge from dev --- src/pudl/transform/eia.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index cc778ac72f..7564091cf3 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1206,20 +1206,7 @@ def finished_eia_asset_factory( name=table_name, io_manager_key=io_manager_key, ) -""" - # Remove fields that came from input data but aren't in the - # corresponding SQLite tables. The data may still exist but has been - # moved elsewhere. - for cat in eia_transformed_dfs: - resource = pudl.metadata.classes.Package.from_resource_ids().get_resource(cat) - eia_transformed_dfs[cat] = resource.enforce_schema(eia_transformed_dfs[cat]) - for cat in entities_dfs: - resource = pudl.metadata.classes.Package.from_resource_ids().get_resource(cat) - entities_dfs[cat] = resource.enforce_schema(entities_dfs[cat]) - - return entities_dfs, eia_transformed_dfs -""" def finished_eia_asset(**kwargs) -> pd.DataFrame: """Enforce PUDL DB schema on a cleaned EIA dataframe.""" df = convert_cols_dtypes(kwargs[clean_table_name], data_source="eia") From d464394a2e997a46fd9a14441666d27b0711d28e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 May 2023 15:43:49 +0000 Subject: [PATCH 17/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/eia.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 7564091cf3..4bed06184a 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -1206,7 +1206,6 @@ def finished_eia_asset_factory( name=table_name, io_manager_key=io_manager_key, ) - def finished_eia_asset(**kwargs) -> pd.DataFrame: """Enforce PUDL DB schema on a cleaned EIA dataframe.""" df = convert_cols_dtypes(kwargs[clean_table_name], data_source="eia") From 3675ebad2585b46da6d673720b545cfd94d92d39 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Thu, 18 May 2023 12:51:19 -0600 Subject: [PATCH 18/26] Don't drop total_fuel_consumption_quantity in clean_boiler_fuel_eia923() --- src/pudl/transform/eia923.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 7e0a05270a..e6a9b4f310 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -762,7 +762,10 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da quantity_cols + relative_cols + key_cols - + ["prime_mover_code", "sector_id_eia", "sector_name_eia"] + + [ + "prime_mover_code", "sector_id_eia", "sector_name_eia", + "total_fuel_consumption_quantity", + ] ) actual_cols = set(boiler_fuel_df.columns) difference = actual_cols.symmetric_difference(expected_cols) @@ -829,7 +832,6 @@ def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFra # See https://github.com/catalyst-cooperative/pudl/issues/509 cols_to_drop = [ "fuel_unit", - "total_fuel_consumption_quantity", "balancing_authority_code_eia", "early_release", "reporting_frequency_code", From cb74321a15191e36d320494848b7df5f3670f731 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 May 2023 18:52:39 +0000 Subject: [PATCH 19/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/eia923.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index e6a9b4f310..20ed19fed9 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -763,7 +763,9 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da + relative_cols + key_cols + [ - "prime_mover_code", "sector_id_eia", "sector_name_eia", + "prime_mover_code", + "sector_id_eia", + "sector_name_eia", "total_fuel_consumption_quantity", ] ) From 54dd4ccdb41c4085ff3f0efe505bd706479c6d81 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Fri, 19 May 2023 07:03:23 -0600 Subject: [PATCH 20/26] More field non-dropping --- src/pudl/metadata/classes.py | 18 ------------------ src/pudl/transform/eia923.py | 5 ++--- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 2aa957428d..2d579af480 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -1697,24 +1697,6 @@ def encode(self, df: pd.DataFrame) -> pd.DataFrame: ) return df - def enforce_schema(self, df: pd.DataFrame) -> pd.DataFrame: - """Drop columns not in the DB schema and enforce specified types.""" - expected_cols = pd.Index(self.get_field_names()) - missing_cols = list(expected_cols.difference(df.columns)) - if missing_cols: - raise ValueError( - f"{self.name}: Missing columns found when enforcing table " - f"schema: {missing_cols}" - ) - df = self.format_df(df) - pk = self.schema.primary_key - if pk and not df[df.duplicated(subset=pk)].empty: - raise ValueError( - f"{self.name} Duplicate primary keys when enforcing schema." - ) - return df - - # ---- Package ---- # diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 20ed19fed9..5b97700609 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -767,6 +767,8 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da "sector_id_eia", "sector_name_eia", "total_fuel_consumption_quantity", + "balancing_authority_code_eia", + "early_release", "reporting_frequency_code", ] ) actual_cols = set(boiler_fuel_df.columns) @@ -834,9 +836,6 @@ def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFra # See https://github.com/catalyst-cooperative/pudl/issues/509 cols_to_drop = [ "fuel_unit", - "balancing_authority_code_eia", - "early_release", - "reporting_frequency_code", "data_maturity", ] bf_df.drop(cols_to_drop, axis=1, inplace=True) From 761ba7bd037b71a4d331e987b99fe418d1b3aba5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 May 2023 13:07:13 +0000 Subject: [PATCH 21/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/metadata/classes.py | 1 + src/pudl/transform/eia923.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 2d579af480..922561f284 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -1697,6 +1697,7 @@ def encode(self, df: pd.DataFrame) -> pd.DataFrame: ) return df + # ---- Package ---- # diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 5b97700609..a614758871 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -768,7 +768,8 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da "sector_name_eia", "total_fuel_consumption_quantity", "balancing_authority_code_eia", - "early_release", "reporting_frequency_code", + "early_release", + "reporting_frequency_code", ] ) actual_cols = set(boiler_fuel_df.columns) From 67105a1860fb555b36e8066e7f1c31db69d2bfe8 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Mon, 22 May 2023 16:16:37 -0600 Subject: [PATCH 22/26] Don't drop fields in clean_generation_eia923() --- src/pudl/package_data/eia923/column_maps/generator.csv | 4 ++-- src/pudl/transform/eia923.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/pudl/package_data/eia923/column_maps/generator.csv b/src/pudl/package_data/eia923/column_maps/generator.csv index 949eb9f18f..a51008560b 100644 --- a/src/pudl/package_data/eia923/column_maps/generator.csv +++ b/src/pudl/package_data/eia923/column_maps/generator.csv @@ -2,8 +2,8 @@ year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index a614758871..461ed9ca6a 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -943,16 +943,6 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: raw_generator_eia923.dropna(subset=["generator_id"]) .drop( [ - "combined_heat_power", - "plant_name_eia", - "operator_name", - "operator_id", - "plant_state", - "census_region", - "nerc_region", - "naics_code", - "net_generation_mwh_year_to_date", - "early_release", ], axis="columns", ) From ee0ae79d8f2d56552c2a6b51bdadb32e41cebb99 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 May 2023 22:17:40 +0000 Subject: [PATCH 23/26] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/eia923.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 461ed9ca6a..ad4c90520d 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -942,8 +942,7 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: gen_df = ( raw_generator_eia923.dropna(subset=["generator_id"]) .drop( - [ - ], + [], axis="columns", ) .pipe(_yearly_to_monthly_records) From d962d4f7bb8cc7bd48033dd4f216be36d1666f74 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Tue, 23 May 2023 11:24:26 -0600 Subject: [PATCH 24/26] Bug 509 work in clean_fuel_receipts_costs_eia923() --- .../package_data/eia923/column_maps/fuel_receipts_costs.csv | 4 ++-- src/pudl/transform/eia923.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv index 6e6b264f83..f6b25ef65e 100644 --- a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv +++ b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv @@ -21,8 +21,8 @@ ash_content_pct,average_ash_content,average_ash_content,average_ash_content,aver mercury_content_ppm,,,,,average_mercury_content,,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content fuel_cost_per_mmbtu,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id reporting_frequency_code,respondent_frequency,respondent_frequency,respondent_frequency,respondent_frequency,reporting_frequency,respondent_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency primary_transportation_mode_code,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode secondary_transportation_mode_code,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index ad4c90520d..703502a4e2 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -1084,9 +1084,6 @@ def clean_fuel_receipts_costs_eia923( # Drop fields we're not inserting into the fuel_receipts_costs_eia923 # table. cols_to_drop = [ - "plant_state", - "operator_name", - "operator_id", "mine_id_msha", "mine_type_code", "state", @@ -1094,7 +1091,6 @@ def clean_fuel_receipts_costs_eia923( "state_id_fips", "mine_name", "regulated", - "early_release", ] cmi_df = ( From 1d466f13592cfdb8177e007b97c6165f0d26c202 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Wed, 24 May 2023 07:14:20 -0600 Subject: [PATCH 25/26] Don't drop fields in clean_generation_fuel_eia923() --- .../package_data/eia923/column_maps/generation_fuel.csv | 2 +- src/pudl/transform/eia923.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv index c518ef25b4..2fe4bfaadc 100644 --- a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv @@ -4,7 +4,7 @@ combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined nuclear_unit_id,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,state,state,state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 703502a4e2..47beb58693 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -618,19 +618,12 @@ def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): # Drop fields we're not inserting into the generation_fuel_eia923 table. cols_to_drop = [ - "combined_heat_power", - "operator_id", - "plant_state", - "census_region", - "nerc_region", - "naics_code", "fuel_unit", "total_fuel_consumption_quantity", "electric_fuel_consumption_quantity", "total_fuel_consumption_mmbtu", "elec_fuel_consumption_mmbtu", "net_generation_megawatthours", - "early_release", ] gen_fuel.drop(cols_to_drop, axis=1, inplace=True) From 15cb0c6122b8a548d00cbc4e4a35cd0cf4b62f75 Mon Sep 17 00:00:00 2001 From: Kurt Nordback Date: Wed, 24 May 2023 16:55:28 -0600 Subject: [PATCH 26/26] Remove a useless function call --- src/pudl/transform/eia923.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 47beb58693..279b89eee8 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -934,10 +934,6 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: """ gen_df = ( raw_generator_eia923.dropna(subset=["generator_id"]) - .drop( - [], - axis="columns", - ) .pipe(_yearly_to_monthly_records) .pipe(pudl.helpers.fix_eia_na) .pipe(pudl.helpers.convert_to_date)