diff --git a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv index 6f8ba8f4e9..cb20dd38db 100644 --- a/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/boiler_fuel.csv @@ -1,9 +1,9 @@ year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id -combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant +associated_combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region diff --git a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv index 6e6b264f83..f6b25ef65e 100644 --- a/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv +++ b/src/pudl/package_data/eia923/column_maps/fuel_receipts_costs.csv @@ -21,8 +21,8 @@ ash_content_pct,average_ash_content,average_ash_content,average_ash_content,aver mercury_content_ppm,,,,,average_mercury_content,,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content,average_mercury_content fuel_cost_per_mmbtu,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost,fuel_cost regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated,regulated -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id reporting_frequency_code,respondent_frequency,respondent_frequency,respondent_frequency,respondent_frequency,reporting_frequency,respondent_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency,reporting_frequency primary_transportation_mode_code,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode,primary_transportation_mode secondary_transportation_mode_code,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode,secondary_transportation_mode diff --git a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv index 4def3915bf..2fe4bfaadc 100644 --- a/src/pudl/package_data/eia923/column_maps/generation_fuel.csv +++ b/src/pudl/package_data/eia923/column_maps/generation_fuel.csv @@ -3,8 +3,8 @@ plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plan combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant nuclear_unit_id,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_i_d,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id,nuclear_unit_id plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,state,state,state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region diff --git a/src/pudl/package_data/eia923/column_maps/generator.csv b/src/pudl/package_data/eia923/column_maps/generator.csv index 949eb9f18f..a51008560b 100644 --- a/src/pudl/package_data/eia923/column_maps/generator.csv +++ b/src/pudl/package_data/eia923/column_maps/generator.csv @@ -2,8 +2,8 @@ year_index,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021 plant_id_eia,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id,plant_id combined_heat_power,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant,combined_heat_and_power_plant plant_name_eia,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name,plant_name -operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name -operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id +utility_name_eia,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name,operator_name +utility_id_eia,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id,operator_id plant_state,state,state,state,state,plant_state,state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state,plant_state census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region,census_region nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region,nerc_region diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 4eff62eb7f..279b89eee8 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -618,21 +618,12 @@ def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): # Drop fields we're not inserting into the generation_fuel_eia923 table. cols_to_drop = [ - "combined_heat_power", - "plant_name_eia", - "operator_name", - "operator_id", - "plant_state", - "census_region", - "nerc_region", - "naics_code", "fuel_unit", "total_fuel_consumption_quantity", "electric_fuel_consumption_quantity", "total_fuel_consumption_mmbtu", "elec_fuel_consumption_mmbtu", "net_generation_megawatthours", - "early_release", ] gen_fuel.drop(cols_to_drop, axis=1, inplace=True) @@ -764,7 +755,15 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da quantity_cols + relative_cols + key_cols - + ["prime_mover_code", "sector_id_eia", "sector_name_eia"] + + [ + "prime_mover_code", + "sector_id_eia", + "sector_name_eia", + "total_fuel_consumption_quantity", + "balancing_authority_code_eia", + "early_release", + "reporting_frequency_code", + ] ) actual_cols = set(boiler_fuel_df.columns) difference = actual_cols.symmetric_difference(expected_cols) @@ -830,19 +829,7 @@ def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFra # Need to stop dropping fields that contain harvestable entity attributes. # See https://github.com/catalyst-cooperative/pudl/issues/509 cols_to_drop = [ - "combined_heat_power", - "plant_name_eia", - "operator_name", - "operator_id", - "plant_state", - "census_region", - "nerc_region", - "naics_code", "fuel_unit", - "total_fuel_consumption_quantity", - "balancing_authority_code_eia", - "early_release", - "reporting_frequency_code", "data_maturity", ] bf_df.drop(cols_to_drop, axis=1, inplace=True) @@ -947,21 +934,6 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: """ gen_df = ( raw_generator_eia923.dropna(subset=["generator_id"]) - .drop( - [ - "combined_heat_power", - "plant_name_eia", - "operator_name", - "operator_id", - "plant_state", - "census_region", - "nerc_region", - "naics_code", - "net_generation_mwh_year_to_date", - "early_release", - ], - axis="columns", - ) .pipe(_yearly_to_monthly_records) .pipe(pudl.helpers.fix_eia_na) .pipe(pudl.helpers.convert_to_date) @@ -1101,10 +1073,6 @@ def clean_fuel_receipts_costs_eia923( # Drop fields we're not inserting into the fuel_receipts_costs_eia923 # table. cols_to_drop = [ - "plant_name_eia", - "plant_state", - "operator_name", - "operator_id", "mine_id_msha", "mine_type_code", "state", @@ -1112,7 +1080,6 @@ def clean_fuel_receipts_costs_eia923( "state_id_fips", "mine_name", "regulated", - "early_release", ] cmi_df = (