diff --git a/.gitignore b/.gitignore index 569bc5a5..c2782d11 100644 --- a/.gitignore +++ b/.gitignore @@ -191,6 +191,7 @@ notes/ .DS_Store # ignore claude stuff .claude/settings.local.json +CLAUDE.local.md # ignore all pypsa output files *.nc diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py index bf26486a..67d6260c 100644 --- a/src/ispypsa/templater/create_template.py +++ b/src/ispypsa/templater/create_template.py @@ -30,6 +30,10 @@ _filter_flow_path_augmentations_to_granularity, _template_network_expansion, ) +from ispypsa.templater.new_entrants import ( + _template_generators_new_entrant, + _template_storage_new_entrant, +) from ispypsa.templater.nodes import ( _template_regions, _template_sub_regions, @@ -220,10 +224,6 @@ def create_ispypsa_inputs_template( template["network_expansion_options"] = expansion_options template["network_transmission_path_expansion_costs"] = expansion_costs - # todo: replace with actual generators_new_entrant once that templating - # function is written — passing empty placeholder for now so costs_connection - # is wired up but produces no VRE rows until generators are templated. - # connection_capacity_non_vre is in manually_extracted_template_tables/ (sourced from # ENOR tables 16-17 and confirmed with AEMO) but is needed as an iasr_tables input, # not a template output. TODO revisit when more manual tables added and consider @@ -232,8 +232,13 @@ def create_ispypsa_inputs_template( "connection_capacity_non_vre" ].copy() - generators_new_entrant = pd.DataFrame(columns=["geo_id", "technology"]) - storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"]) + # Identity columns only for now - not yet a templater output + generators_new_entrant = _template_generators_new_entrant( + iasr_tables["new_entrants_summary"] + ) + storage_new_entrant = _template_storage_new_entrant( + iasr_tables["new_entrants_summary"] + ) template["costs_connection"] = _template_connection_costs( iasr_tables, scenario, diff --git a/src/ispypsa/templater/custom_constraints_from_plexos.py b/src/ispypsa/templater/custom_constraints_from_plexos.py index c2a4672e..68c35fbe 100644 --- a/src/ispypsa/templater/custom_constraints_from_plexos.py +++ b/src/ispypsa/templater/custom_constraints_from_plexos.py @@ -176,6 +176,7 @@ import pandas as pd +from .helpers import _is_battery_row, _pick_location from .mappings import _CANONICAL_TIMESLICES # PLEXOS REZ-id prefixes that IASR renamed to DREZ. Applied to the first @@ -924,30 +925,6 @@ def _battery_to_location(new_entrants: pd.DataFrame) -> dict[str, str]: return dict(zip(batteries["IASR ID / DLT names"], locations)) -def _is_battery_row(new_entrants: pd.DataFrame) -> pd.Series: - """Boolean mask selecting battery rows in new_entrants_summary. - - Matches any Technology Type that contains the literal substring - "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular) - and "Distributed Resources Batteries" (plural). Other storage - technologies (pumped hydro, solar thermal) intentionally do not match. - """ - return new_entrants["Technology Type"].str.contains("Batter", na=False) - - -def _pick_location(row: pd.Series) -> str: - """Return REZ ID when populated, otherwise Sub-region. - - I/O Example: - {"REZ ID": "Q8", "Sub-region": "SQ"} -> "Q8" - {"REZ ID": "Not Applicable", "Sub-region": "SQ"} -> "SQ" - """ - rez_id = row["REZ ID"] - if pd.notna(rez_id) and rez_id != "Not Applicable": - return rez_id - return row["Sub-region"] - - def _triggered_locations_per_constraint( lhs: pd.DataFrame, unit_to_location: dict[str, str] ) -> pd.DataFrame: diff --git a/src/ispypsa/templater/helpers.py b/src/ispypsa/templater/helpers.py index eb229e63..da9456b5 100644 --- a/src/ispypsa/templater/helpers.py +++ b/src/ispypsa/templater/helpers.py @@ -384,6 +384,51 @@ def _strip_all_text_after_numeric_value( return series +def _pick_location(row: pd.Series) -> str: + """Return a technology's REZ ID when populated, otherwise Sub-region. + + I/O Example: + {"REZ ID": "Q8", "Sub-region": "SQ"} -> "Q8" + {"REZ ID": "Not Applicable", "Sub-region": "SQ"} -> "SQ" + """ + rez_id = row["REZ ID"] + if pd.notna(rez_id) and rez_id != "Not Applicable": + return rez_id + return row["Sub-region"] + + +def _is_battery_row( + df: pd.DataFrame, col_to_check: str = "Technology Type" +) -> pd.Series: + """Boolean mask selecting battery technology rows in ``df``. + + Matches any ``col_to_check`` row that contains the literal substring + "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular) + and "Distributed Resources Batteries" (plural). Other storage + technologies (pumped hydro, solar thermal) intentionally do not match. + """ + return df[col_to_check].str.contains("Batter", na=False) + + +def _is_pumped_hydro_row( + df: pd.DataFrame, col_to_check: str = "Technology Type" +) -> pd.Series: + """Boolean mask selecting pumped hydro technology rows in ``df``. + + Matches any ``col_to_check`` row that contains the literal substring + "Pumped Hydro" -- covering all durations. Other storage technologies + (batteries, solar thermal) intentionally do not match. + """ + return df[col_to_check].str.contains("Pumped Hydro", na=False) + + +def _is_storage_row( + df: pd.DataFrame, col_to_check: str = "Technology Type" +) -> pd.Series: + """Wrapper that returns union of ``_is_battery_row`` and ``_is_pumped_hydro_row``.""" + return _is_battery_row(df, col_to_check) | _is_pumped_hydro_row(df, col_to_check) + + def _standardise_storage_capitalisation(series: pd.Series) -> pd.Series: """ Standardises capitalisation of "storage" in a pandas Series. diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py new file mode 100644 index 00000000..5f06d2ef --- /dev/null +++ b/src/ispypsa/templater/new_entrants.py @@ -0,0 +1,177 @@ +"""Templates the new entrant generator and storage identity tables. + +Both tables are currently built from a single IASR input, the ``new_entrants_summary`` +table. This module splits that table into its two subsets and shapes each into the +identity columns of its target schema (see schemas/generators_new_entrant.yaml and +schemas/storage_new_entrant.yaml). + +There are two independent public orchestrators, one per output table, each taking +the full summary. They share the same shape: + 1. Filter the summary to the relevant technology group + 2. Rename the carried-over summary columns to their schema names + 3. Derive geo_id + 4. (Generators only) Derive resource_type + 5. Select the table's group-specific identity columns. +""" + +import logging + +import pandas as pd + +from ispypsa.templater.helpers import ( + _is_storage_row, + _pick_location, +) + +_GENERATOR_IDENTITY_COLUMNS = [ + "name", + "technology", + "resource_type", + "geo_id", + "fuel_type", + "fuel_price_mapping", +] + +_STORAGE_IDENTITY_COLUMNS = [ + "name", + "technology", + "geo_id", + "fuel_type", +] + +# Source (IASR new_entrants_summary) column names → schema output column names. +_SUMMARY_COLUMN_RENAMES = { + "IASR ID / DLT names": "name", + "Technology Type": "technology", + "Fuel type": "fuel_type", + "Fuel cost mapping": "fuel_price_mapping", +} + +# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a +# mapping for it if/when resource_limits templating requires one. +_RESOURCE_QUALITY_CODE_TO_TYPE = { + "WH": "wind_high", + "WM": "wind_medium", + "WFX": "wind_offshore_fixed", + "WFL": "wind_offshore_floating", + "SAT": "solar", + "CST": "solar", +} + +# Regex extracting the resource-quality code embedded between underscores in a VRE # IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". Derived from the code map, it +# expands to "_(WFX|WFL|SAT|...)_" — one capture group over the known codes # sorted longest-first so a short code can't shadow a longer one it prefixes. +_RESOURCE_CODE_PATTERN = "_({})_".format( + "|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True)) +) + + +# --- public orchestrators --- + + +# NOTE: partial scope intentional - other columns to be added in next PRs! +def _template_generators_new_entrant( + new_entrants_summary: pd.DataFrame, +) -> pd.DataFrame: + """Templates the new entrant generators identity table from the IASR summary. + + Keeps only generator rows, renames the carried-over summary columns to schema + names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE + resource code in the IASR ID), and returns the identity columns. + + I/O Example: + new_entrants_summary (abbr.): + IASR ID Power Station Technology Type REZ ID Sub-region Fuel type Fuel cost mapping + N3_WH_rez N3_WH_rez Wind N3 NNSW Wind Wind + N3 Battery N3 Battery Battery (2hrs) N3 NNSW Battery Battery + SQ CCGT SQ CCGT CCGT Not Applicable SQ Gas QLD new CCGT + + Returns: + name technology resource_type geo_id fuel_type fuel_price_mapping + N3_WH_rez Wind wind_high N3 Wind Wind + SQ CCGT CCGT SQ Gas QLD new CCGT + + """ + logging.info("Creating a template for new entrant generators") + gens = new_entrants_summary[~_is_storage_row(new_entrants_summary)].copy() + gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES) + gens = _set_geo_id(gens) + gens = _add_resource_type(gens) + return gens[_GENERATOR_IDENTITY_COLUMNS] + + +# NOTE: partial scope intentional - other columns to be added in next PRs! +def _template_storage_new_entrant( + new_entrants_summary: pd.DataFrame, +) -> pd.DataFrame: + """Templates the new entrant storage identity table from the IASR summary. + + Keeps only storage rows, renames the carried-over summary columns to schema + names, derives geo_id (REZ ID or sub-region), and returns the identity columns. + + I/O Example: + new_entrants_summary (abbr.): + IASR ID Power Station Technology Type REZ ID Sub-region Fuel type Fuel cost mapping + N3_WH_rez N3_WH_rez Wind N3 NNSW Wind Wind + N3 Battery N3 Battery Battery (2hrs) N3 NNSW Battery Battery + SQ CCGT SQ CCGT CCGT Not Applicable SQ Gas QLD new CCGT + + Returns: + name technology geo_id fuel_type + N3 Battery Battery (2hrs) N3 Battery + """ + logging.info("Creating a template for new entrant storage") + storage = new_entrants_summary[_is_storage_row(new_entrants_summary)].copy() + storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES) + storage = _set_geo_id(storage) + return storage[_STORAGE_IDENTITY_COLUMNS] + + +# --- shared helpers --- + + +def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame: + """Adds 'geo_id' column to new_entrants containing REZ ID with Sub-region fallback. + + Applies ``_pick_location`` helper to each row of the new_entrants table to + set their 'geo_id'. Simple wrapper for readability. + """ + new_entrants["geo_id"] = new_entrants.apply(_pick_location, axis=1) + return new_entrants + + +# --- generator-specific helpers --- + + +def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame: + """Adds the VRE ``resource_type`` column from the resource code in ``name``. + + VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH" + in "Q1_WH_Far North QLD". The code is extracted and mapped via + ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with no matching code — the underscore- + free thermal and distributed-resource rows — get NaN. + + I/O Example: + gens: + name technology + Q1_WH_Far North QLD Wind + Q1_WM_Far North QLD Wind + N10_WFX_Hunter Coast Wind - offshore (fixed) + DREZ_SAT_Dubbo Large scale Solar PV + N0_CST_NSW Solar Thermal (16hrs storage) + CNSW SAT - Distributed Resources Distributed Resources Solar + CNSW OCGT Small OCGT (small GT) + + returns (adds resource_type): + name technology resource_type + Q1_WH_Far North QLD Wind wind_high + Q1_WM_Far North QLD Wind wind_medium + N10_WFX_Hunter Coast Wind - offshore (fixed) wind_offshore_fixed + DREZ_SAT_Dubbo Large scale Solar PV solar + N0_CST_NSW Solar Thermal (16hrs storage) solar # CST -> solar + CNSW SAT - Distributed Resources Distributed Resources Solar NaN # no _ token + CNSW OCGT Small OCGT (small GT) NaN # no _ token + """ + gens = gens.copy() + resource_code = gens["name"].str.extract(_RESOURCE_CODE_PATTERN, expand=False) + gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE) + return gens diff --git a/src/ispypsa/validation/schemas/storage_new_entrant.yaml b/src/ispypsa/validation/schemas/storage_new_entrant.yaml index 62492bb8..a6009a56 100644 --- a/src/ispypsa/validation/schemas/storage_new_entrant.yaml +++ b/src/ispypsa/validation/schemas/storage_new_entrant.yaml @@ -32,14 +32,6 @@ columns: type: string required: true description: Unique identifier for the storage unit (e.g. IASR ID or full name). - power_station: - type: string - required: true - description: > - Power station name grouping storage units together. - - For new entrant storage units, this is always the same as the `name` field - (i.e, no grouping is performed). Keeping for consistency with existing storage tables. technology: type: string required: true diff --git a/tests/test_iasr_table_caching/test_local_cache.py b/tests/test_iasr_table_caching/test_local_cache.py index 28670f81..5e43df7f 100644 --- a/tests/test_iasr_table_caching/test_local_cache.py +++ b/tests/test_iasr_table_caching/test_local_cache.py @@ -27,6 +27,8 @@ def test_build_required_tables_new_format(): assert "connection_costs_for_wind_and_solar" in result assert "connection_costs_other" in result assert "efficient_level_of_system_strength_cost" in result + # New entrant generator summary feeds the generators_new_entrant templater + assert "new_entrants_summary" in result def test_build_required_tables_old_format(): diff --git a/tests/test_templater/test_create_ispypsa_inputs_template.py b/tests/test_templater/test_create_ispypsa_inputs_template.py index a935853c..28d5400c 100644 --- a/tests/test_templater/test_create_ispypsa_inputs_template.py +++ b/tests/test_templater/test_create_ispypsa_inputs_template.py @@ -215,17 +215,24 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): Q1, 400 """) connection_cost_forecast_other = csv_str_to_df(""" - Generator Type, Region, Scenario, 2024-25, 2025-26 - Battery Storage (4h), NSW, Step Change, 20000000, 22000000 + Generator Type, Region, Scenario, 2024-25, 2025-26 + OCGT (small GT), NSW, Step Change, 20000000, 22000000 """) connection_capacity_non_vre = csv_str_to_df(""" - Region, Generator Type, Connection capacity (MVA) - NSW, Battery Storage (4h), 400 + Region, Generator Type, Connection capacity (MVA) + NSW, OCGT (small GT), 400 """) efficient_level_of_system_strength_cost = csv_str_to_df(""" label, 2024-25 IBR, 10 """) + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + CNSW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, CNSW + SNW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, SNW + """) with ( patch( @@ -255,6 +262,7 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar, "connection_cost_forecast_other": connection_cost_forecast_other, "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost, + "new_entrants_summary": new_entrants_summary, }, # connection_capacity_non_vre is popped out of manually_extracted_tables # into iasr_tables by create_template; supplied so the @@ -301,7 +309,6 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): # 3 expansion_ids x 2 years assert len(expansion_costs) == 6 - assert "costs_connection" in result costs_connection = result["costs_connection"] assert set(costs_connection.columns) == { "geo_id", @@ -310,10 +317,9 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): "connection_cost", "system_strength_cost", } - # costs_connection key present with correct columns; currently - # generators_new_entrant is placeholder (empty) so no VRE rows are produced - # yet (but no errors either). - assert costs_connection.empty + # [(2 VRE) x (1 REZ) + (1 non-VRE) x (2 subregions)] x 2 years + assert set(costs_connection["geo_id"]) == {"Q1", "CNSW", "SNW"} + assert len(costs_connection) == 8 # Custom-constraints tables are spliced into the output via # template.update(template_custom_constraints_from_plexos(...)). The @@ -403,17 +409,24 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): Q1, 400 """) connection_cost_forecast_other = csv_str_to_df(""" - Generator Type, Region, Scenario, 2024-25, 2025-26 - Battery Storage (4h), NSW, Step Change, 20000000, 22000000 + Generator Type, Region, Scenario, 2024-25, 2025-26 + OCGT (small GT), NSW, Step Change, 20000000, 22000000 """) connection_capacity_non_vre = csv_str_to_df(""" - Region, Generator Type, Connection capacity (MVA) - NSW, Battery Storage (4h), 400 + Region, Generator Type, Connection capacity (MVA) + NSW, OCGT (small GT), 400 """) efficient_level_of_system_strength_cost = csv_str_to_df(""" label, 2024-25 IBR, 10 """) + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + CNSW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, CNSW + SNW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, SNW + """) with ( patch( @@ -443,6 +456,7 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar, "connection_cost_forecast_other": connection_cost_forecast_other, "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost, + "new_entrants_summary": new_entrants_summary, }, manually_extracted_tables={ "connection_capacity_non_vre": connection_capacity_non_vre, @@ -476,6 +490,7 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): assert set(expansion_costs["expansion_id"]) == {"NSW-QLD", "N3-NSW"} # 2 expansion_ids x 2 years assert len(expansion_costs) == 4 + costs_connection = result["costs_connection"] assert set(costs_connection.columns) == { "geo_id", @@ -484,7 +499,10 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): "connection_cost", "system_strength_cost", } - assert costs_connection.empty + # REZ geo_ids (Q1) are granularity-invariant, subregions in same region collapse + # [(2 VRE x 1 REZ) + (1 non-VRE x 1 subregion)] x 2 years + assert set(costs_connection["geo_id"]) == {"Q1", "NSW"} + assert len(costs_connection) == 6 # Custom constraints from PLEXOS are sub-regional export limits with no # meaningful representation once sub-regions are collapsed, so the templater @@ -543,17 +561,24 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): Q1, 400 """) connection_cost_forecast_other = csv_str_to_df(""" - Generator Type, Region, Scenario, 2024-25, 2025-26 - Battery Storage (4h), NSW, Step Change, 20000000, 22000000 + Generator Type, Region, Scenario, 2024-25, 2025-26 + OCGT (small GT), NSW, Step Change, 20000000, 22000000 """) connection_capacity_non_vre = csv_str_to_df(""" - Region, Generator Type, Connection capacity (MVA) - NSW, Battery Storage (4h), 400 + Region, Generator Type, Connection capacity (MVA) + NSW, OCGT (small GT), 400 """) efficient_level_of_system_strength_cost = csv_str_to_df(""" label, 2024-25 IBR, 10 """) + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + CNSW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, CNSW + SNW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, SNW + """) with ( patch( @@ -581,6 +606,7 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar, "connection_cost_forecast_other": connection_cost_forecast_other, "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost, + "new_entrants_summary": new_entrants_summary, }, manually_extracted_tables={ "connection_capacity_non_vre": connection_capacity_non_vre, @@ -613,6 +639,7 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): assert set(expansion_costs["expansion_id"]) == {"N3-NEM"} # 1 expansion_id x 2 years assert len(expansion_costs) == 2 + connection_costs = result["costs_connection"] assert set(connection_costs.columns) == { "geo_id", @@ -621,7 +648,9 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): "connection_cost", "system_strength_cost", } - assert connection_costs.empty + # [(2 VRE x 1 REZ) + (1 non-VRE x NEM)] x 2 years + assert set(connection_costs["geo_id"]) == {"Q1", "NEM"} + assert len(connection_costs) == 6 # Custom constraints from PLEXOS are sub-regional export limits with no # meaningful representation at single_region, so the templater skips them. diff --git a/tests/test_templater/test_custom_constraints_from_plexos.py b/tests/test_templater/test_custom_constraints_from_plexos.py index 2c3ae7a9..ce377382 100644 --- a/tests/test_templater/test_custom_constraints_from_plexos.py +++ b/tests/test_templater/test_custom_constraints_from_plexos.py @@ -31,12 +31,10 @@ _generator_to_location, _iasr_id_choices, _inject_iasr_new_entrant_batteries, - _is_battery_row, _line_variable_name, _location_battery_pairs, _log_injected_batteries, _match_unit_name, - _pick_location, _plexos_extract_dir, _rename_battery_name, _rename_first_token, @@ -830,42 +828,6 @@ def test_build_custom_constraints_rhs_maps_to_region_prefixed_canonical_timeslic pd.testing.assert_frame_equal(result, expected) -# --- _is_battery_row --- - - -def test_is_battery_row(csv_str_to_df): - new_entrants = csv_str_to_df(""" - IASR ID / DLT names, Technology Type - Q1 Battery - 2h, Battery Storage (2hrs storage) - NQ Battery - Dist, Distributed Resources Batteries - Q1 Wind, Wind - N1 Pumped Hydro - 24h,Pumped Hydro (24hrs storage) - Q1 Solar Thermal, Solar Thermal (16hrs storage) - """) - - result = _is_battery_row(new_entrants) - - # Battery + Distributed Resources Batteries match; others (incl. pumped - # hydro and solar thermal storage) do not. - assert list(result) == [True, True, False, False, False] - - -# --- _pick_location --- - - -@pytest.mark.parametrize( - "rez_id, sub_region, expected", - [ - ("Q8", "SQ", "Q8"), # REZ ID populated -> REZ ID - ("Not Applicable", "SQ", "SQ"), # 'Not Applicable' -> Sub-region - (None, "SQ", "SQ"), # NaN/None -> Sub-region - ], -) -def test_pick_location(rez_id, sub_region, expected): - row = pd.Series({"REZ ID": rez_id, "Sub-region": sub_region}) - assert _pick_location(row) == expected - - # --- _generator_to_location --- diff --git a/tests/test_templater/test_helpers.py b/tests/test_templater/test_helpers.py index 2696aa3c..6d474501 100644 --- a/tests/test_templater/test_helpers.py +++ b/tests/test_templater/test_helpers.py @@ -2,8 +2,12 @@ import pytest from ispypsa.templater.helpers import ( + _is_battery_row, + _is_pumped_hydro_row, + _is_storage_row, _looks_like_financial_year, _manual_remove_footnotes_from_generator_names, + _pick_location, _rez_name_to_id_mapping, _snakecase_string, _standardise_storage_capitalisation, @@ -373,3 +377,78 @@ def test_looks_like_financial_year_matches_only_canonical_formats(): assert _looks_like_financial_year("24-25") is False assert _looks_like_financial_year("Status") is False assert _looks_like_financial_year("Flow path") is False + + +# --- _pick_location --- + + +@pytest.mark.parametrize( + "rez_id, sub_region, expected", + [ + ("Q8", "SQ", "Q8"), # REZ ID populated -> REZ ID + ("Not Applicable", "SQ", "SQ"), # 'Not Applicable' -> Sub-region + (None, "SQ", "SQ"), # NaN/None -> Sub-region + ], +) +def test_pick_location(rez_id, sub_region, expected): + row = pd.Series({"REZ ID": rez_id, "Sub-region": sub_region}) + assert _pick_location(row) == expected + + +# --- _is_battery_row --- + + +def test_is_battery_row(csv_str_to_df): + new_entrants = csv_str_to_df(""" + IASR ID / DLT names, Technology Type + Q1 Battery - 2h, Battery Storage (2hrs storage) + NQ Battery - Dist, Distributed Resources Batteries + Q1 Wind, Wind + N1 Pumped Hydro - 24h, Pumped Hydro (24hrs storage) + Q1 Solar Thermal, Solar Thermal (16hrs storage) + """) + + result = _is_battery_row(new_entrants) + + # Battery + Distributed Resources Batteries match; others (incl. pumped + # hydro and solar thermal storage) do not. + assert list(result) == [True, True, False, False, False] + + +# --- _is_pumped_hydro_row --- + + +def test_is_pumped_hydro_row(csv_str_to_df): + new_entrants = csv_str_to_df(""" + IASR ID / DLT names, Technology Type + Q1 Battery - 2h, Battery Storage (2hrs storage) + NQ Battery - Dist, Distributed Resources Batteries + Q1 Wind, Wind + N1 Pumped Hydro - 24h, Pumped Hydro (24hrs storage) + Q1 Solar Thermal, Solar Thermal (16hrs storage) + """) + + result = _is_pumped_hydro_row(new_entrants) + + # Pumped Hydro resources match; Batter* and other storage do not. + assert list(result) == [False, False, False, True, False] + + +# --- _is_storage_row --- + + +def test_is_storage_row(csv_str_to_df): + new_entrants = csv_str_to_df(""" + IASR ID / DLT names, Technology + Q1 Battery - 2h, Battery Storage (2hrs storage) + NQ Battery - Dist, Distributed Resources Batteries + Q1 Wind, Wind + N1 Pumped Hydro - 24h, Pumped Hydro (24hrs storage) + Q1 Solar Thermal, Solar Thermal (16hrs storage) + """) + + result = _is_storage_row(new_entrants, col_to_check="Technology") + + # Battery, Distributed Resources Batteries and Pumped Hydro all match. + # Solar thermal still does not. + assert list(result) == [True, True, False, True, False] diff --git a/tests/test_templater/test_new_entrants.py b/tests/test_templater/test_new_entrants.py new file mode 100644 index 00000000..2b32d386 --- /dev/null +++ b/tests/test_templater/test_new_entrants.py @@ -0,0 +1,131 @@ +import pandas as pd +import pytest + +from ispypsa.templater.new_entrants import ( + _GENERATOR_IDENTITY_COLUMNS, + _STORAGE_IDENTITY_COLUMNS, + _add_resource_type, + _set_geo_id, + _template_generators_new_entrant, + _template_storage_new_entrant, +) + +# --- orchestrators --- + + +def test_template_generators_new_entrant(csv_str_to_df): + # Wiring check only (per-helper behaviour is covered below): storage is dropped, + # the identity columns are produced, and one row per surviving generating unit + # is returned. Detailed content is covered by the per-helper tests. + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_WM_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + NQ OCGT Small, OCGT (small GT), Gas, QLD new OCGT, Not Applicable, NQ + NQ SAT - Distributed Resources, Distributed Resources Solar, Solar, Solar, Not Applicable, NQ + NQ Battery 2hrs, Battery Storage (2hrs storage), Battery, Battery, Not Applicable, NQ + """) + + result = _template_generators_new_entrant(new_entrants_summary) + + # storage row dropped -> 5 of 6 rows survive; identity columns produced in order + assert list(result.columns) == _GENERATOR_IDENTITY_COLUMNS + assert len(result) == 5 + + +def test_template_storage_new_entrant(csv_str_to_df): + # Wiring check only (per-helper behaviour is covered below): generators are + # dropped, the identity columns are produced, and one row per surviving storage + # unit is returned. Detailed content is covered by the per-helper tests. + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + NQ OCGT Small, OCGT (small GT), Gas, QLD new OCGT, Not Applicable, NQ + NQ Battery 2hrs, Battery Storage (2hrs storage), Battery, Battery, N3, NQ + NQ Battery - Distributed, Distributed Resources Batteries, Battery, Battery, Not Applicable, NQ + Snowy PH 24hr, Pumped Hydro (24hrs storage), Water, Water, Not Applicable, NQ + """) + + result = _template_storage_new_entrant(new_entrants_summary) + + # generator rows dropped -> 3 of 5 rows survive; identity columns produced in order + assert list(result.columns) == _STORAGE_IDENTITY_COLUMNS + assert len(result) == 3 + + +# --- _set_geo_id --- + + +def test_set_geo_id(csv_str_to_df): + # Check that the wrapper adds 'geo_id' column, correctly applying ``_pick_location`` + # and not impacting existing columns. + new_entrants = csv_str_to_df(""" + technology, REZ ID, Sub-region + Wind, N3, CNSW + OCGT (small GT), Not Applicable, NQ + """) + + result = _set_geo_id(new_entrants) + + expected = csv_str_to_df(""" + technology, REZ ID, Sub-region, geo_id + Wind, N3, CNSW, N3 + OCGT (small GT), Not Applicable, NQ, NQ + """) + pd.testing.assert_frame_equal(result, expected) + + +def test_set_geo_id_empty_input(csv_str_to_df): + # Empty input still returns the added geo_id column + new_entrants = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"]) + + result = _set_geo_id(new_entrants) + + expected = csv_str_to_df(""" + technology, REZ ID, Sub-region, geo_id + """) + pd.testing.assert_frame_equal(result, expected, check_dtype=False) + + +# --- _add_resource_type (generator-specific) --- + + +def test_add_resource_type(csv_str_to_df): + # resource_type is read from the underscore-delimited code in `name`. WH/WM are + # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the + # underscore-free thermal / distributed IDs map to NaN (blank field). + gens = csv_str_to_df(""" + name, technology + Q1_WH_Far North QLD, Wind + Q1_WM_Far North QLD, Wind + N10_WFX_Hunter Coast, Wind - offshore (fixed) + DREZ_SAT_Dubbo, Large scale Solar PV + N0_CST_NSW, Solar Thermal (16hrs storage) + CNSW SAT - Distributed Resources, Distributed Resources Solar + CNSW OCGT Small, OCGT (small GT) + """) + + result = _add_resource_type(gens) + + expected = csv_str_to_df(""" + name, technology, resource_type + Q1_WH_Far North QLD, Wind, wind_high + Q1_WM_Far North QLD, Wind, wind_medium + N10_WFX_Hunter Coast, Wind - offshore (fixed), wind_offshore_fixed + DREZ_SAT_Dubbo, Large scale Solar PV, solar + N0_CST_NSW, Solar Thermal (16hrs storage), solar + CNSW SAT - Distributed Resources, Distributed Resources Solar, + CNSW OCGT Small, OCGT (small GT), + """) + pd.testing.assert_frame_equal(result, expected) + + +def test_add_resource_type_empty_input(): + # test empty input still returns the input df columns + resource_type column + empty_input = pd.DataFrame(columns=["name", "technology"]) + + result = _add_resource_type(empty_input) + + expected = pd.DataFrame(columns=["name", "technology", "resource_type"]) + pd.testing.assert_frame_equal(result, expected)