From 8fd519f6adadd45725686051bfa964a085ebf25b Mon Sep 17 00:00:00 2001 From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:16:59 +1000 Subject: [PATCH 1/5] add ClAUDE.local.md to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 569bc5a5..c2782d11 100644 --- a/.gitignore +++ b/.gitignore @@ -191,6 +191,7 @@ notes/ .DS_Store # ignore claude stuff .claude/settings.local.json +CLAUDE.local.md # ignore all pypsa output files *.nc From d4c381095a82659817560f829d834676a956ae58 Mon Sep 17 00:00:00 2001 From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com> Date: Thu, 11 Jun 2026 11:31:30 +1000 Subject: [PATCH 2/5] add identity templating for new entrant generators --- src/ispypsa/templater/create_template.py | 15 +- .../templater/generators_new_entrant.py | 179 ++++++++++++++++++ .../test_local_cache.py | 2 + .../test_create_ispypsa_inputs_template.py | 47 ++++- .../test_generators_new_entrant.py | 165 ++++++++++++++++ 5 files changed, 397 insertions(+), 11 deletions(-) create mode 100644 src/ispypsa/templater/generators_new_entrant.py create mode 100644 tests/test_templater/test_generators_new_entrant.py diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py index bf26486a..d3e1fe71 100644 --- a/src/ispypsa/templater/create_template.py +++ b/src/ispypsa/templater/create_template.py @@ -21,6 +21,9 @@ _template_sub_regional_flow_path_costs, _template_sub_regional_flow_paths, ) +from ispypsa.templater.generators_new_entrant import ( + _template_generators_new_entrant, +) from ispypsa.templater.geography import _template_network_geography from ispypsa.templater.network_expansion import ( _extract_flow_path_costs_from_iasr, @@ -220,10 +223,6 @@ def create_ispypsa_inputs_template( template["network_expansion_options"] = expansion_options template["network_transmission_path_expansion_costs"] = expansion_costs - # todo: replace with actual generators_new_entrant once that templating - # function is written — passing empty placeholder for now so costs_connection - # is wired up but produces no VRE rows until generators are templated. - # connection_capacity_non_vre is in manually_extracted_template_tables/ (sourced from # ENOR tables 16-17 and confirmed with AEMO) but is needed as an iasr_tables input, # not a template output. TODO revisit when more manual tables added and consider @@ -232,7 +231,13 @@ def create_ispypsa_inputs_template( "connection_capacity_non_vre" ].copy() - generators_new_entrant = pd.DataFrame(columns=["geo_id", "technology"]) + # Identity columns only for now (name, technology, resource_type, geo_id, + # fuel_type, fuel_price_mapping); cost/property columns are added in later + # PRs. Feeds costs_connection but is not yet a written template output. + generators_new_entrant = _template_generators_new_entrant( + iasr_tables["new_entrants_summary"] + ) + # storage_new_entrant remains defined (empty) for wiring tests storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"]) template["costs_connection"] = _template_connection_costs( iasr_tables, diff --git a/src/ispypsa/templater/generators_new_entrant.py b/src/ispypsa/templater/generators_new_entrant.py new file mode 100644 index 00000000..c0b73e99 --- /dev/null +++ b/src/ispypsa/templater/generators_new_entrant.py @@ -0,0 +1,179 @@ +# Templates the `generators_new_entrant` table: one row per new entrant generating +# unit, with storage technologies excluded (those are templated separately into the +# storage new-entrants table). See schemas/generators_new_entrant.yaml for the target. +import logging + +import pandas as pd + +from ispypsa.templater.helpers import _where_any_substring_appears + +_IDENTITY_COLUMNS = [ + "name", + "technology", + "resource_type", + "geo_id", + "fuel_type", + "fuel_price_mapping", +] + +_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"] + +# Source (IASR new_entrants_summary) column names → schema output column names. +# The summary's own values are treated as canonical; no cross-table canonicalisation +# is applied here. "IASR ID / DLT names" is an existing unique identifier per row. +_SUMMARY_COLUMN_RENAMES = { + "IASR ID / DLT names": "name", + "Technology Type": "technology", + "Fuel type": "fuel_type", + "Fuel cost mapping": "fuel_price_mapping", +} + +# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a +# mapping for it if/when resource_limits templating requires one. +_RESOURCE_QUALITY_CODE_TO_TYPE = { + "WH": "wind_high", + "WM": "wind_medium", + "WFX": "wind_offshore_fixed", + "WFL": "wind_offshore_floating", + "SAT": "solar", + "CST": "solar", +} + + +# NOTE: partial scope intentional - other columns to be added in next PRs! +def _template_generators_new_entrant( + new_entrants_summary: pd.DataFrame, +) -> pd.DataFrame: + """Templates the new entrant generators identity table from the IASR summary. + + Drops storage, renames the carried-over summary columns to schema names, derives + geo_id (REZ ID or sub-region) and resource_type (from the VRE resource code in + the IASR ID), and returns the identity columns. + + Args: + new_entrants_summary: IASR ``new_entrants_summary`` table. + + Returns: + One row per generating unit with columns ``_IDENTITY_COLUMNS``. + """ + logging.info("Creating a template for new entrant generators") + gens = _drop_storage_technologies(new_entrants_summary) + gens = _rename_summary_columns(gens) + gens = _set_geo_id(gens) + gens = _add_resource_type(gens) + return gens[_IDENTITY_COLUMNS] + + +def _drop_storage_technologies(new_entrants_summary: pd.DataFrame) -> pd.DataFrame: + """Drops storage rows from the new entrants summary, keeping only generators. + + Storage (batteries, distributed batteries, pumped hydro) is templated into the + storage new-entrants table, so it is removed here. Matching is case-insensitive + on the "Technology Type" column (see ``_STORAGE_TECHNOLOGY_STRINGS``). + + I/O Example: + new_entrants_summary: + Technology Type REZ ID + Wind N3 + Large scale Solar PV N3 + Battery Storage (2hrs storage) N3 # storage: dropped + Distributed Resources Batteries Not Applicable # storage: dropped + Pumped Hydro (24hrs storage) Not Applicable # storage: dropped + OCGT (small GT) Not Applicable + + returns: + Technology Type REZ ID + Wind N3 + Large scale Solar PV N3 + OCGT (small GT) Not Applicable + """ + is_storage = _where_any_substring_appears( + new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS + ) + return new_entrants_summary.loc[~is_storage].reset_index(drop=True) + + +def _rename_summary_columns(gens: pd.DataFrame) -> pd.DataFrame: + """Renames the summary's identifier, technology and fuel columns to schema names. + + See ``_SUMMARY_COLUMN_RENAMES``. Other columns (e.g. "REZ ID", "Sub-region", + still needed to derive geo_id) pass through untouched. + + I/O Example: + gens: + IASR ID / DLT names Technology Type Fuel type Fuel cost mapping REZ ID + Q1_WH_Far North QLD Wind Wind Wind Q1 + CNSW OCGT Small OCGT (small GT) Gas NSW new OCGT Not Applicable + + returns: + name technology fuel_type fuel_price_mapping REZ ID + Q1_WH_Far North QLD Wind Wind Wind Q1 + CNSW OCGT Small OCGT (small GT) Gas NSW new OCGT Not Applicable + """ + return gens.rename(columns=_SUMMARY_COLUMN_RENAMES) + + +def _set_geo_id(gens: pd.DataFrame) -> pd.DataFrame: + """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region. + + REZ-located generators (VRE) carry a real "REZ ID"; thermal and distributed + resource rows have "REZ ID" == "Not Applicable" and sit at the sub-region, so + they take their "Sub-region" value instead. Non-REZ IDs (e.g. N0, V0) flow + through unchanged as REZ IDs. + + I/O Example: + gens: + technology REZ ID Sub-region + Wind N3 CNSW + Large scale Solar PV N0 CNSW # Non-REZ: kept as-is + OCGT (small GT) Not Applicable NQ + Distributed Resources Solar Not Applicable SQ + + returns (adds geo_id): + technology REZ ID Sub-region geo_id + Wind N3 CNSW N3 + Large scale Solar PV N0 CNSW N0 + OCGT (small GT) Not Applicable NQ NQ + Distributed Resources Solar Not Applicable SQ SQ + """ + gens = gens.copy() + gens["geo_id"] = gens["REZ ID"].where( + gens["REZ ID"] != "Not Applicable", gens["Sub-region"] + ) + return gens + + +def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame: + """Adds the VRE ``resource_type`` column from the resource code in ``name``. + + VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH" + in "Q1_WH_Far North QLD" (wind high) or "SAT" in "DREZ_SAT_Dubbo" (solar). The + code is extracted and mapped via ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with + no matching code — the underscore-free thermal and distributed-resource rows — + get NaN, meaning no VRE build-limit applies. + + I/O Example: + gens: + name technology + Q1_WH_Far North QLD Wind + Q1_WM_Far North QLD Wind + N10_WFX_Hunter Coast Wind - offshore (fixed) + DREZ_SAT_Dubbo Large scale Solar PV + N0_CST_NSW Solar Thermal (16hrs storage) + CNSW SAT - Distributed Resources Distributed Resources Solar + CNSW OCGT Small OCGT (small GT) + + returns (adds resource_type): + name technology resource_type + Q1_WH_Far North QLD Wind wind_high + Q1_WM_Far North QLD Wind wind_medium + N10_WFX_Hunter Coast Wind - offshore (fixed) wind_offshore_fixed + DREZ_SAT_Dubbo Large scale Solar PV solar + N0_CST_NSW Solar Thermal (16hrs storage) solar # CST -> solar + CNSW SAT - Distributed Resources Distributed Resources Solar NaN # no _ token + CNSW OCGT Small OCGT (small GT) NaN # no _ token + """ + gens = gens.copy() + resource_code = gens["name"].str.extract(r"_(WH|WM|WFX|WFL|SAT|CST)_", expand=False) + gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE) + return gens diff --git a/tests/test_iasr_table_caching/test_local_cache.py b/tests/test_iasr_table_caching/test_local_cache.py index 28670f81..5e43df7f 100644 --- a/tests/test_iasr_table_caching/test_local_cache.py +++ b/tests/test_iasr_table_caching/test_local_cache.py @@ -27,6 +27,8 @@ def test_build_required_tables_new_format(): assert "connection_costs_for_wind_and_solar" in result assert "connection_costs_other" in result assert "efficient_level_of_system_strength_cost" in result + # New entrant generator summary feeds the generators_new_entrant templater + assert "new_entrants_summary" in result def test_build_required_tables_old_format(): diff --git a/tests/test_templater/test_create_ispypsa_inputs_template.py b/tests/test_templater/test_create_ispypsa_inputs_template.py index a935853c..0c13d968 100644 --- a/tests/test_templater/test_create_ispypsa_inputs_template.py +++ b/tests/test_templater/test_create_ispypsa_inputs_template.py @@ -226,6 +226,16 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): label, 2024-25 IBR, 10 """) + # Two VRE generators at Q1 (which has a connection cost forecast) so + # costs_connection produces rows; an OCGT and a storage row check that + # non-VRE pass through and storage is dropped. + new_entrants_summary = csv_str_to_df(""" + IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region + Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ + NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ + NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + """) with ( patch( @@ -255,6 +265,7 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar, "connection_cost_forecast_other": connection_cost_forecast_other, "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost, + "new_entrants_summary": new_entrants_summary, }, # connection_capacity_non_vre is popped out of manually_extracted_tables # into iasr_tables by create_template; supplied so the @@ -310,10 +321,9 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): "connection_cost", "system_strength_cost", } - # costs_connection key present with correct columns; currently - # generators_new_entrant is placeholder (empty) so no VRE rows are produced - # yet (but no errors either). - assert costs_connection.empty + assert set(costs_connection["geo_id"]) == {"Q1"} + assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"} + assert len(costs_connection) == 2 # Custom-constraints tables are spliced into the output via # template.update(template_custom_constraints_from_plexos(...)). The @@ -414,6 +424,13 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): label, 2024-25 IBR, 10 """) + new_entrants_summary = csv_str_to_df(""" + IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region + Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ + NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ + NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + """) with ( patch( @@ -443,6 +460,7 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar, "connection_cost_forecast_other": connection_cost_forecast_other, "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost, + "new_entrants_summary": new_entrants_summary, }, manually_extracted_tables={ "connection_capacity_non_vre": connection_capacity_non_vre, @@ -484,7 +502,11 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): "connection_cost", "system_strength_cost", } - assert costs_connection.empty + # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1 + # still produce two connection-cost rows at nem_regions granularity. + assert set(costs_connection["geo_id"]) == {"Q1"} + assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"} + assert len(costs_connection) == 2 # Custom constraints from PLEXOS are sub-regional export limits with no # meaningful representation once sub-regions are collapsed, so the templater @@ -554,6 +576,13 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): label, 2024-25 IBR, 10 """) + new_entrants_summary = csv_str_to_df(""" + IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region + Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ + NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ + NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + """) with ( patch( @@ -581,6 +610,7 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar, "connection_cost_forecast_other": connection_cost_forecast_other, "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost, + "new_entrants_summary": new_entrants_summary, }, manually_extracted_tables={ "connection_capacity_non_vre": connection_capacity_non_vre, @@ -613,6 +643,7 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): assert set(expansion_costs["expansion_id"]) == {"N3-NEM"} # 1 expansion_id x 2 years assert len(expansion_costs) == 2 + connection_costs = result["costs_connection"] assert set(connection_costs.columns) == { "geo_id", @@ -621,7 +652,11 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): "connection_cost", "system_strength_cost", } - assert connection_costs.empty + # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1 + # still produce two connection-cost rows at single_region granularity. + assert set(connection_costs["geo_id"]) == {"Q1"} + assert set(connection_costs["technology"]) == {"Wind", "Large scale Solar PV"} + assert len(connection_costs) == 2 # Custom constraints from PLEXOS are sub-regional export limits with no # meaningful representation at single_region, so the templater skips them. diff --git a/tests/test_templater/test_generators_new_entrant.py b/tests/test_templater/test_generators_new_entrant.py new file mode 100644 index 00000000..9973ad33 --- /dev/null +++ b/tests/test_templater/test_generators_new_entrant.py @@ -0,0 +1,165 @@ +import pandas as pd + +from ispypsa.templater.generators_new_entrant import ( + _IDENTITY_COLUMNS, + _add_resource_type, + _drop_storage_technologies, + _rename_summary_columns, + _set_geo_id, + _template_generators_new_entrant, +) + +# --- _template_generators_new_entrant (orchestrator) --- + + +def test_template_generators_new_entrant(csv_str_to_df): + # Wiring check only (per-helper behaviour is covered above): storage is dropped, + # the identity columns are produced, and one row per surviving generating unit + # is returned. Detailed content is covered by the per-helper tests. + new_entrants_summary = csv_str_to_df(""" + IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region + Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ + Q1_WM_Far__North__QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ + NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ + NQ__SAT__-__Distributed__Resources,Distributed__Resources__Solar, Solar, Solar, Not__Applicable, NQ + NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + """) + + result = _template_generators_new_entrant(new_entrants_summary) + + # storage row dropped -> 5 of 6 rows survive; identity columns produced in order + assert list(result.columns) == _IDENTITY_COLUMNS + assert len(result) == 5 + + +# --- _drop_storage_technologies --- + + +def test_drop_storage_technologies(csv_str_to_df): + # All storage variants (batteries, distributed batteries, pumped hydro) are + # dropped; generation rows pass through unchanged with other columns intact. + new_entrants_summary = csv_str_to_df(""" + Technology__Type, REZ__ID + Wind, N3 + Large__scale__Solar__PV, N3 + Battery__Storage__(2hrs__storage), N3 + Distributed__Resources__Batteries, Not__Applicable + Pumped__Hydro__(24hrs__storage), Not__Applicable + OCGT__(small__GT), Not__Applicable + """) + + result = _drop_storage_technologies(new_entrants_summary) + + expected = csv_str_to_df(""" + Technology__Type, REZ__ID + Wind, N3 + Large__scale__Solar__PV, N3 + OCGT__(small__GT), Not__Applicable + """) + pd.testing.assert_frame_equal(result, expected) + + +def test_drop_storage_technologies_empty_input(csv_str_to_df): + # Empty input (all columns, no rows) returns an empty frame, no errors. + new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"]) + + result = _drop_storage_technologies(new_entrants_summary) + + expected = csv_str_to_df(""" + Technology__Type, REZ__ID + """) + pd.testing.assert_frame_equal(result, expected, check_dtype=False) + + +# --- _rename_summary_columns --- + + +def test_rename_summary_columns(csv_str_to_df): + # The IASR ID, technology and fuel columns are renamed to their schema names; + # other columns (REZ ID) pass through untouched. + gens = csv_str_to_df(""" + IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID + Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1 + CNSW__OCGT__Small, OCGT__(small__GT),Gas, NSW__new__OCGT, Not__Applicable + """) + + result = _rename_summary_columns(gens) + + expected = csv_str_to_df(""" + name, technology, fuel_type, fuel_price_mapping, REZ__ID + Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1 + CNSW__OCGT__Small, OCGT__(small__GT),Gas, NSW__new__OCGT, Not__Applicable + """) + pd.testing.assert_frame_equal(result, expected) + + +# --- _set_geo_id --- + + +def test_set_geo_id(csv_str_to_df): + # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal and + # distributed rows ("Not Applicable") fall back to their Sub-region. + gens = csv_str_to_df(""" + technology, REZ__ID, Sub-region + Wind, N3, CNSW + Large__scale__Solar__PV, N0, CNSW + OCGT__(small__GT), Not__Applicable, NQ + Distributed__Resources__Solar,Not__Applicable, SQ + """) + + result = _set_geo_id(gens) + + expected = csv_str_to_df(""" + technology, REZ__ID, Sub-region, geo_id + Wind, N3, CNSW, N3 + Large__scale__Solar__PV, N0, CNSW, N0 + OCGT__(small__GT), Not__Applicable, NQ, NQ + Distributed__Resources__Solar,Not__Applicable, SQ, SQ + """) + pd.testing.assert_frame_equal(result, expected) + + +def test_set_geo_id_empty_input(csv_str_to_df): + # Empty input still returns the geo_id column (all columns, no rows). + gens = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"]) + + result = _set_geo_id(gens) + + expected = csv_str_to_df(""" + technology, REZ__ID, Sub-region, geo_id + """) + pd.testing.assert_frame_equal(result, expected, check_dtype=False) + + +# --- _add_resource_type --- + + +def test_add_resource_type(csv_str_to_df): + # resource_type is read from the underscore-delimited code in `name`. WH/WM are + # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the + # underscore-free thermal / distributed IDs map to NaN (blank field). + gens = csv_str_to_df(""" + name, technology + Q1_WH_Far__North__QLD, Wind + Q1_WM_Far__North__QLD, Wind + N10_WFX_Hunter__Coast, Wind__-__offshore__(fixed) + DREZ_SAT_Dubbo, Large__scale__Solar__PV + N0_CST_NSW, Solar__Thermal__(16hrs__storage) + CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar + CNSW__OCGT__Small, OCGT__(small__GT) + """) + + result = _add_resource_type(gens) + + expected = csv_str_to_df(""" + name, technology, resource_type + Q1_WH_Far__North__QLD, Wind, wind_high + Q1_WM_Far__North__QLD, Wind, wind_medium + N10_WFX_Hunter__Coast, Wind__-__offshore__(fixed), wind_offshore_fixed + DREZ_SAT_Dubbo, Large__scale__Solar__PV, solar + N0_CST_NSW, Solar__Thermal__(16hrs__storage), solar + CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar, + CNSW__OCGT__Small, OCGT__(small__GT), + """) + pd.testing.assert_frame_equal(result, expected) From 1834fa9b54e1475ee782a446267e2926ef11b61d Mon Sep 17 00:00:00 2001 From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:02:57 +1000 Subject: [PATCH 3/5] add new entrant storage identity templating --- src/ispypsa/templater/create_template.py | 16 +- .../templater/generators_new_entrant.py | 179 ------------- src/ispypsa/templater/new_entrants.py | 242 ++++++++++++++++++ .../schemas/storage_new_entrant.yaml | 8 - .../test_create_ispypsa_inputs_template.py | 82 +++--- .../test_generators_new_entrant.py | 165 ------------ tests/test_templater/test_new_entrants.py | 200 +++++++++++++++ 7 files changed, 488 insertions(+), 404 deletions(-) delete mode 100644 src/ispypsa/templater/generators_new_entrant.py create mode 100644 src/ispypsa/templater/new_entrants.py delete mode 100644 tests/test_templater/test_generators_new_entrant.py create mode 100644 tests/test_templater/test_new_entrants.py diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py index d3e1fe71..67d6260c 100644 --- a/src/ispypsa/templater/create_template.py +++ b/src/ispypsa/templater/create_template.py @@ -21,9 +21,6 @@ _template_sub_regional_flow_path_costs, _template_sub_regional_flow_paths, ) -from ispypsa.templater.generators_new_entrant import ( - _template_generators_new_entrant, -) from ispypsa.templater.geography import _template_network_geography from ispypsa.templater.network_expansion import ( _extract_flow_path_costs_from_iasr, @@ -33,6 +30,10 @@ _filter_flow_path_augmentations_to_granularity, _template_network_expansion, ) +from ispypsa.templater.new_entrants import ( + _template_generators_new_entrant, + _template_storage_new_entrant, +) from ispypsa.templater.nodes import ( _template_regions, _template_sub_regions, @@ -231,14 +232,13 @@ def create_ispypsa_inputs_template( "connection_capacity_non_vre" ].copy() - # Identity columns only for now (name, technology, resource_type, geo_id, - # fuel_type, fuel_price_mapping); cost/property columns are added in later - # PRs. Feeds costs_connection but is not yet a written template output. + # Identity columns only for now - not yet a templater output generators_new_entrant = _template_generators_new_entrant( iasr_tables["new_entrants_summary"] ) - # storage_new_entrant remains defined (empty) for wiring tests - storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"]) + storage_new_entrant = _template_storage_new_entrant( + iasr_tables["new_entrants_summary"] + ) template["costs_connection"] = _template_connection_costs( iasr_tables, scenario, diff --git a/src/ispypsa/templater/generators_new_entrant.py b/src/ispypsa/templater/generators_new_entrant.py deleted file mode 100644 index c0b73e99..00000000 --- a/src/ispypsa/templater/generators_new_entrant.py +++ /dev/null @@ -1,179 +0,0 @@ -# Templates the `generators_new_entrant` table: one row per new entrant generating -# unit, with storage technologies excluded (those are templated separately into the -# storage new-entrants table). See schemas/generators_new_entrant.yaml for the target. -import logging - -import pandas as pd - -from ispypsa.templater.helpers import _where_any_substring_appears - -_IDENTITY_COLUMNS = [ - "name", - "technology", - "resource_type", - "geo_id", - "fuel_type", - "fuel_price_mapping", -] - -_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"] - -# Source (IASR new_entrants_summary) column names → schema output column names. -# The summary's own values are treated as canonical; no cross-table canonicalisation -# is applied here. "IASR ID / DLT names" is an existing unique identifier per row. -_SUMMARY_COLUMN_RENAMES = { - "IASR ID / DLT names": "name", - "Technology Type": "technology", - "Fuel type": "fuel_type", - "Fuel cost mapping": "fuel_price_mapping", -} - -# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a -# mapping for it if/when resource_limits templating requires one. -_RESOURCE_QUALITY_CODE_TO_TYPE = { - "WH": "wind_high", - "WM": "wind_medium", - "WFX": "wind_offshore_fixed", - "WFL": "wind_offshore_floating", - "SAT": "solar", - "CST": "solar", -} - - -# NOTE: partial scope intentional - other columns to be added in next PRs! -def _template_generators_new_entrant( - new_entrants_summary: pd.DataFrame, -) -> pd.DataFrame: - """Templates the new entrant generators identity table from the IASR summary. - - Drops storage, renames the carried-over summary columns to schema names, derives - geo_id (REZ ID or sub-region) and resource_type (from the VRE resource code in - the IASR ID), and returns the identity columns. - - Args: - new_entrants_summary: IASR ``new_entrants_summary`` table. - - Returns: - One row per generating unit with columns ``_IDENTITY_COLUMNS``. - """ - logging.info("Creating a template for new entrant generators") - gens = _drop_storage_technologies(new_entrants_summary) - gens = _rename_summary_columns(gens) - gens = _set_geo_id(gens) - gens = _add_resource_type(gens) - return gens[_IDENTITY_COLUMNS] - - -def _drop_storage_technologies(new_entrants_summary: pd.DataFrame) -> pd.DataFrame: - """Drops storage rows from the new entrants summary, keeping only generators. - - Storage (batteries, distributed batteries, pumped hydro) is templated into the - storage new-entrants table, so it is removed here. Matching is case-insensitive - on the "Technology Type" column (see ``_STORAGE_TECHNOLOGY_STRINGS``). - - I/O Example: - new_entrants_summary: - Technology Type REZ ID - Wind N3 - Large scale Solar PV N3 - Battery Storage (2hrs storage) N3 # storage: dropped - Distributed Resources Batteries Not Applicable # storage: dropped - Pumped Hydro (24hrs storage) Not Applicable # storage: dropped - OCGT (small GT) Not Applicable - - returns: - Technology Type REZ ID - Wind N3 - Large scale Solar PV N3 - OCGT (small GT) Not Applicable - """ - is_storage = _where_any_substring_appears( - new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS - ) - return new_entrants_summary.loc[~is_storage].reset_index(drop=True) - - -def _rename_summary_columns(gens: pd.DataFrame) -> pd.DataFrame: - """Renames the summary's identifier, technology and fuel columns to schema names. - - See ``_SUMMARY_COLUMN_RENAMES``. Other columns (e.g. "REZ ID", "Sub-region", - still needed to derive geo_id) pass through untouched. - - I/O Example: - gens: - IASR ID / DLT names Technology Type Fuel type Fuel cost mapping REZ ID - Q1_WH_Far North QLD Wind Wind Wind Q1 - CNSW OCGT Small OCGT (small GT) Gas NSW new OCGT Not Applicable - - returns: - name technology fuel_type fuel_price_mapping REZ ID - Q1_WH_Far North QLD Wind Wind Wind Q1 - CNSW OCGT Small OCGT (small GT) Gas NSW new OCGT Not Applicable - """ - return gens.rename(columns=_SUMMARY_COLUMN_RENAMES) - - -def _set_geo_id(gens: pd.DataFrame) -> pd.DataFrame: - """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region. - - REZ-located generators (VRE) carry a real "REZ ID"; thermal and distributed - resource rows have "REZ ID" == "Not Applicable" and sit at the sub-region, so - they take their "Sub-region" value instead. Non-REZ IDs (e.g. N0, V0) flow - through unchanged as REZ IDs. - - I/O Example: - gens: - technology REZ ID Sub-region - Wind N3 CNSW - Large scale Solar PV N0 CNSW # Non-REZ: kept as-is - OCGT (small GT) Not Applicable NQ - Distributed Resources Solar Not Applicable SQ - - returns (adds geo_id): - technology REZ ID Sub-region geo_id - Wind N3 CNSW N3 - Large scale Solar PV N0 CNSW N0 - OCGT (small GT) Not Applicable NQ NQ - Distributed Resources Solar Not Applicable SQ SQ - """ - gens = gens.copy() - gens["geo_id"] = gens["REZ ID"].where( - gens["REZ ID"] != "Not Applicable", gens["Sub-region"] - ) - return gens - - -def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame: - """Adds the VRE ``resource_type`` column from the resource code in ``name``. - - VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH" - in "Q1_WH_Far North QLD" (wind high) or "SAT" in "DREZ_SAT_Dubbo" (solar). The - code is extracted and mapped via ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with - no matching code — the underscore-free thermal and distributed-resource rows — - get NaN, meaning no VRE build-limit applies. - - I/O Example: - gens: - name technology - Q1_WH_Far North QLD Wind - Q1_WM_Far North QLD Wind - N10_WFX_Hunter Coast Wind - offshore (fixed) - DREZ_SAT_Dubbo Large scale Solar PV - N0_CST_NSW Solar Thermal (16hrs storage) - CNSW SAT - Distributed Resources Distributed Resources Solar - CNSW OCGT Small OCGT (small GT) - - returns (adds resource_type): - name technology resource_type - Q1_WH_Far North QLD Wind wind_high - Q1_WM_Far North QLD Wind wind_medium - N10_WFX_Hunter Coast Wind - offshore (fixed) wind_offshore_fixed - DREZ_SAT_Dubbo Large scale Solar PV solar - N0_CST_NSW Solar Thermal (16hrs storage) solar # CST -> solar - CNSW SAT - Distributed Resources Distributed Resources Solar NaN # no _ token - CNSW OCGT Small OCGT (small GT) NaN # no _ token - """ - gens = gens.copy() - resource_code = gens["name"].str.extract(r"_(WH|WM|WFX|WFL|SAT|CST)_", expand=False) - gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE) - return gens diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py new file mode 100644 index 00000000..2e5f6e89 --- /dev/null +++ b/src/ispypsa/templater/new_entrants.py @@ -0,0 +1,242 @@ +"""Templates the new entrant generator and storage identity tables. + +Both tables are currently built from a single IASR input, the ``new_entrants_summary`` +table. This module splits that table into its two halves and shapes each into the +identity columns of its target schema (see schemas/generators_new_entrant.yaml and +schemas/storage_new_entrant.yaml). + +There are two independent public orchestrators, one per output table, each taking +the full summary. They share the same shape: + 1. Filter the summary to the relevant technology group — generators or storage + — with _filter_to_technology_group. The storage predicate (battery, pumped + hydro — see _STORAGE_TECHNOLOGY_STRINGS) lives in that one function, so the + two orchestrators can't drift out of sync on what counts as storage. + 2. Rename the carried-over summary columns to their schema names + (_SUMMARY_COLUMN_RENAMES). The summary's own values are treated as + canonical; no cross-table canonicalisation is applied here. + 3. Derive geo_id: REZ-located units (most VRE, REZ-co-located batteries) take + their REZ ID; sub-region-located units (thermal, pumped hydro, distributed + resources) have "REZ ID" == "Not Applicable" and take their Sub-region. + 4. (Generators only) Derive resource_type from the resource-quality code + embedded in the IASR ID — see _add_resource_type. + 5. Select the table's identity columns. _SUMMARY_COLUMN_RENAMES is shared, so + a column it renames that this table does not want (fuel_price_mapping for + storage) simply falls away here. + +Reference detail: + - _SUMMARY_COLUMN_RENAMES: source column -> schema column, shared across both + tables; each table keeps only the renamed columns its identity list needs. + - _STORAGE_TECHNOLOGY_STRINGS: the substrings that mark a row as storage. + - _RESOURCE_QUALITY_CODE_TO_TYPE: VRE resource-quality code -> resource_type. +""" + +import logging + +import pandas as pd + +from ispypsa.templater.helpers import _where_any_substring_appears + +_GENERATOR_IDENTITY_COLUMNS = [ + "name", + "technology", + "resource_type", + "geo_id", + "fuel_type", + "fuel_price_mapping", +] + +_STORAGE_IDENTITY_COLUMNS = [ + "name", + "technology", + "geo_id", + "fuel_type", +] + +_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"] + +# Source (IASR new_entrants_summary) column names → schema output column names. +_SUMMARY_COLUMN_RENAMES = { + "IASR ID / DLT names": "name", + "Technology Type": "technology", + "Fuel type": "fuel_type", + "Fuel cost mapping": "fuel_price_mapping", +} + +# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a +# mapping for it if/when resource_limits templating requires one. +_RESOURCE_QUALITY_CODE_TO_TYPE = { + "WH": "wind_high", + "WM": "wind_medium", + "WFX": "wind_offshore_fixed", + "WFL": "wind_offshore_floating", + "SAT": "solar", + "CST": "solar", +} + +# Extraction pattern for the resource-quality code embedded between underscores in +# a VRE IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". +_RESOURCE_CODE_PATTERN = "_({})_".format( + "|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True)) +) + + +# --- public orchestrators --- + + +# NOTE: partial scope intentional - other columns to be added in next PRs! +def _template_generators_new_entrant( + new_entrants_summary: pd.DataFrame, +) -> pd.DataFrame: + """Templates the new entrant generators identity table from the IASR summary. + + Keeps only generator rows, renames the carried-over summary columns to schema + names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE + resource code in the IASR ID), and returns the identity columns. + + Args: + new_entrants_summary: IASR ``new_entrants_summary`` table. + + Returns: + One row per generating unit with columns ``_GENERATOR_IDENTITY_COLUMNS``. + """ + logging.info("Creating a template for new entrant generators") + gens = _filter_to_technology_group(new_entrants_summary, "generators") + gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES) + gens = _set_geo_id(gens) + gens = _add_resource_type(gens) + return gens[_GENERATOR_IDENTITY_COLUMNS] + + +# NOTE: partial scope intentional - other columns to be added in next PRs! +def _template_storage_new_entrant( + new_entrants_summary: pd.DataFrame, +) -> pd.DataFrame: + """Templates the new entrant storage identity table from the IASR summary. + + Keeps only storage rows, renames the carried-over summary columns to schema + names, derives geo_id (REZ ID or sub-region), and returns the identity columns. + + Args: + new_entrants_summary: IASR ``new_entrants_summary`` table. + + Returns: + One row per storage unit with columns ``_STORAGE_IDENTITY_COLUMNS``. + """ + logging.info("Creating a template for new entrant storage") + storage = _filter_to_technology_group(new_entrants_summary, "storage") + storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES) + storage = _set_geo_id(storage) + return storage[_STORAGE_IDENTITY_COLUMNS] + + +# --- shared helpers --- + + +def _filter_to_technology_group( + new_entrants_summary: pd.DataFrame, group: str +) -> pd.DataFrame: + """Returns the summary rows for one technology group: generators or storage. + + Storage rows are those whose "Technology Type" contains a + ``_STORAGE_TECHNOLOGY_STRINGS`` substring (battery, pumped hydro), matched + case-insensitively; generators are every other row. The two groups partition + the summary, so this single predicate is the only place the generator/storage + boundary is defined. + + Args: + new_entrants_summary: the IASR ``new_entrants_summary`` table (any frame + with a "Technology Type" column). + group: "generators" or "storage". + + I/O Example: + new_entrants_summary: + Technology Type REZ ID + Wind N3 + Battery Storage (2hrs storage) N3 + Pumped Hydro (24hrs storage) Not Applicable + OCGT (small GT) Not Applicable + + group="generators" returns: + Technology Type REZ ID + Wind N3 + OCGT (small GT) Not Applicable + + group="storage" returns: + Technology Type REZ ID + Battery Storage (2hrs storage) N3 + Pumped Hydro (24hrs storage) Not Applicable + """ + is_storage = _where_any_substring_appears( + new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS + ) + if group == "storage": + return new_entrants_summary.loc[is_storage].reset_index(drop=True) + if group == "generators": + return new_entrants_summary.loc[~is_storage].reset_index(drop=True) + raise ValueError( + "Filtering new entrants table to technology group: " + f"group must be 'generators' or 'storage', got {group!r}" + ) + + +def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame: + """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region. + + I/O Example: + new_entrants: + technology REZ ID Sub-region + Wind N3 CNSW + Large scale Solar PV N0 CNSW # Non-REZ: kept as-is + OCGT (small GT) Not Applicable NQ + Pumped Hydro (24hrs storage) Not Applicable SNW + + returns (adds geo_id): + technology REZ ID Sub-region geo_id + Wind N3 CNSW N3 + Large scale Solar PV N0 CNSW N0 + OCGT (small GT) Not Applicable NQ NQ + Pumped Hydro (24hrs storage) Not Applicable SNW SNW + """ + new_entrants = new_entrants.copy() + new_entrants["geo_id"] = new_entrants["REZ ID"].where( + new_entrants["REZ ID"] != "Not Applicable", new_entrants["Sub-region"] + ) + return new_entrants + + +# --- generator-specific helpers --- + + +def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame: + """Adds the VRE ``resource_type`` column from the resource code in ``name``. + + VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH" + in "Q1_WH_Far North QLD". The code is extracted and mapped via + ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with no matching code — the underscore- + free thermal and distributed-resource rows — get NaN. + + I/O Example: + gens: + name technology + Q1_WH_Far North QLD Wind + Q1_WM_Far North QLD Wind + N10_WFX_Hunter Coast Wind - offshore (fixed) + DREZ_SAT_Dubbo Large scale Solar PV + N0_CST_NSW Solar Thermal (16hrs storage) + CNSW SAT - Distributed Resources Distributed Resources Solar + CNSW OCGT Small OCGT (small GT) + + returns (adds resource_type): + name technology resource_type + Q1_WH_Far North QLD Wind wind_high + Q1_WM_Far North QLD Wind wind_medium + N10_WFX_Hunter Coast Wind - offshore (fixed) wind_offshore_fixed + DREZ_SAT_Dubbo Large scale Solar PV solar + N0_CST_NSW Solar Thermal (16hrs storage) solar # CST -> solar + CNSW SAT - Distributed Resources Distributed Resources Solar NaN # no _ token + CNSW OCGT Small OCGT (small GT) NaN # no _ token + """ + gens = gens.copy() + resource_code = gens["name"].str.extract(_RESOURCE_CODE_PATTERN, expand=False) + gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE) + return gens diff --git a/src/ispypsa/validation/schemas/storage_new_entrant.yaml b/src/ispypsa/validation/schemas/storage_new_entrant.yaml index 62492bb8..a6009a56 100644 --- a/src/ispypsa/validation/schemas/storage_new_entrant.yaml +++ b/src/ispypsa/validation/schemas/storage_new_entrant.yaml @@ -32,14 +32,6 @@ columns: type: string required: true description: Unique identifier for the storage unit (e.g. IASR ID or full name). - power_station: - type: string - required: true - description: > - Power station name grouping storage units together. - - For new entrant storage units, this is always the same as the `name` field - (i.e, no grouping is performed). Keeping for consistency with existing storage tables. technology: type: string required: true diff --git a/tests/test_templater/test_create_ispypsa_inputs_template.py b/tests/test_templater/test_create_ispypsa_inputs_template.py index 0c13d968..28d5400c 100644 --- a/tests/test_templater/test_create_ispypsa_inputs_template.py +++ b/tests/test_templater/test_create_ispypsa_inputs_template.py @@ -215,26 +215,23 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): Q1, 400 """) connection_cost_forecast_other = csv_str_to_df(""" - Generator Type, Region, Scenario, 2024-25, 2025-26 - Battery Storage (4h), NSW, Step Change, 20000000, 22000000 + Generator Type, Region, Scenario, 2024-25, 2025-26 + OCGT (small GT), NSW, Step Change, 20000000, 22000000 """) connection_capacity_non_vre = csv_str_to_df(""" - Region, Generator Type, Connection capacity (MVA) - NSW, Battery Storage (4h), 400 + Region, Generator Type, Connection capacity (MVA) + NSW, OCGT (small GT), 400 """) efficient_level_of_system_strength_cost = csv_str_to_df(""" label, 2024-25 IBR, 10 """) - # Two VRE generators at Q1 (which has a connection cost forecast) so - # costs_connection produces rows; an OCGT and a storage row check that - # non-VRE pass through and storage is dropped. new_entrants_summary = csv_str_to_df(""" - IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region - Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ - Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ - NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ - NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + CNSW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, CNSW + SNW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, SNW """) with ( @@ -312,7 +309,6 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): # 3 expansion_ids x 2 years assert len(expansion_costs) == 6 - assert "costs_connection" in result costs_connection = result["costs_connection"] assert set(costs_connection.columns) == { "geo_id", @@ -321,9 +317,9 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): "connection_cost", "system_strength_cost", } - assert set(costs_connection["geo_id"]) == {"Q1"} - assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"} - assert len(costs_connection) == 2 + # [(2 VRE) x (1 REZ) + (1 non-VRE) x (2 subregions)] x 2 years + assert set(costs_connection["geo_id"]) == {"Q1", "CNSW", "SNW"} + assert len(costs_connection) == 8 # Custom-constraints tables are spliced into the output via # template.update(template_custom_constraints_from_plexos(...)). The @@ -413,23 +409,23 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): Q1, 400 """) connection_cost_forecast_other = csv_str_to_df(""" - Generator Type, Region, Scenario, 2024-25, 2025-26 - Battery Storage (4h), NSW, Step Change, 20000000, 22000000 + Generator Type, Region, Scenario, 2024-25, 2025-26 + OCGT (small GT), NSW, Step Change, 20000000, 22000000 """) connection_capacity_non_vre = csv_str_to_df(""" - Region, Generator Type, Connection capacity (MVA) - NSW, Battery Storage (4h), 400 + Region, Generator Type, Connection capacity (MVA) + NSW, OCGT (small GT), 400 """) efficient_level_of_system_strength_cost = csv_str_to_df(""" label, 2024-25 IBR, 10 """) new_entrants_summary = csv_str_to_df(""" - IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region - Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ - Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ - NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ - NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + CNSW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, CNSW + SNW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, SNW """) with ( @@ -494,6 +490,7 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): assert set(expansion_costs["expansion_id"]) == {"NSW-QLD", "N3-NSW"} # 2 expansion_ids x 2 years assert len(expansion_costs) == 4 + costs_connection = result["costs_connection"] assert set(costs_connection.columns) == { "geo_id", @@ -502,11 +499,10 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df): "connection_cost", "system_strength_cost", } - # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1 - # still produce two connection-cost rows at nem_regions granularity. - assert set(costs_connection["geo_id"]) == {"Q1"} - assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"} - assert len(costs_connection) == 2 + # REZ geo_ids (Q1) are granularity-invariant, subregions in same region collapse + # [(2 VRE x 1 REZ) + (1 non-VRE x 1 subregion)] x 2 years + assert set(costs_connection["geo_id"]) == {"Q1", "NSW"} + assert len(costs_connection) == 6 # Custom constraints from PLEXOS are sub-regional export limits with no # meaningful representation once sub-regions are collapsed, so the templater @@ -565,23 +561,23 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): Q1, 400 """) connection_cost_forecast_other = csv_str_to_df(""" - Generator Type, Region, Scenario, 2024-25, 2025-26 - Battery Storage (4h), NSW, Step Change, 20000000, 22000000 + Generator Type, Region, Scenario, 2024-25, 2025-26 + OCGT (small GT), NSW, Step Change, 20000000, 22000000 """) connection_capacity_non_vre = csv_str_to_df(""" - Region, Generator Type, Connection capacity (MVA) - NSW, Battery Storage (4h), 400 + Region, Generator Type, Connection capacity (MVA) + NSW, OCGT (small GT), 400 """) efficient_level_of_system_strength_cost = csv_str_to_df(""" label, 2024-25 IBR, 10 """) new_entrants_summary = csv_str_to_df(""" - IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region - Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ - Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ - NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ - NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + CNSW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, CNSW + SNW OCGT Small, OCGT (small GT), Gas, NSW new OCGT, Not Applicable, SNW """) with ( @@ -652,11 +648,9 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df): "connection_cost", "system_strength_cost", } - # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1 - # still produce two connection-cost rows at single_region granularity. - assert set(connection_costs["geo_id"]) == {"Q1"} - assert set(connection_costs["technology"]) == {"Wind", "Large scale Solar PV"} - assert len(connection_costs) == 2 + # [(2 VRE x 1 REZ) + (1 non-VRE x NEM)] x 2 years + assert set(connection_costs["geo_id"]) == {"Q1", "NEM"} + assert len(connection_costs) == 6 # Custom constraints from PLEXOS are sub-regional export limits with no # meaningful representation at single_region, so the templater skips them. diff --git a/tests/test_templater/test_generators_new_entrant.py b/tests/test_templater/test_generators_new_entrant.py deleted file mode 100644 index 9973ad33..00000000 --- a/tests/test_templater/test_generators_new_entrant.py +++ /dev/null @@ -1,165 +0,0 @@ -import pandas as pd - -from ispypsa.templater.generators_new_entrant import ( - _IDENTITY_COLUMNS, - _add_resource_type, - _drop_storage_technologies, - _rename_summary_columns, - _set_geo_id, - _template_generators_new_entrant, -) - -# --- _template_generators_new_entrant (orchestrator) --- - - -def test_template_generators_new_entrant(csv_str_to_df): - # Wiring check only (per-helper behaviour is covered above): storage is dropped, - # the identity columns are produced, and one row per surviving generating unit - # is returned. Detailed content is covered by the per-helper tests. - new_entrants_summary = csv_str_to_df(""" - IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID, Sub-region - Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1, NQ - Q1_WM_Far__North__QLD, Wind, Wind, Wind, Q1, NQ - Q1_SAT_Far__North__QLD, Large__scale__Solar__PV, Solar, Solar, Q1, NQ - NQ__OCGT__Small, OCGT__(small__GT), Gas, QLD__new__OCGT, Not__Applicable, NQ - NQ__SAT__-__Distributed__Resources,Distributed__Resources__Solar, Solar, Solar, Not__Applicable, NQ - NQ__Battery__2hrs, Battery__Storage__(2hrs__storage), Battery, Battery, Not__Applicable, NQ - """) - - result = _template_generators_new_entrant(new_entrants_summary) - - # storage row dropped -> 5 of 6 rows survive; identity columns produced in order - assert list(result.columns) == _IDENTITY_COLUMNS - assert len(result) == 5 - - -# --- _drop_storage_technologies --- - - -def test_drop_storage_technologies(csv_str_to_df): - # All storage variants (batteries, distributed batteries, pumped hydro) are - # dropped; generation rows pass through unchanged with other columns intact. - new_entrants_summary = csv_str_to_df(""" - Technology__Type, REZ__ID - Wind, N3 - Large__scale__Solar__PV, N3 - Battery__Storage__(2hrs__storage), N3 - Distributed__Resources__Batteries, Not__Applicable - Pumped__Hydro__(24hrs__storage), Not__Applicable - OCGT__(small__GT), Not__Applicable - """) - - result = _drop_storage_technologies(new_entrants_summary) - - expected = csv_str_to_df(""" - Technology__Type, REZ__ID - Wind, N3 - Large__scale__Solar__PV, N3 - OCGT__(small__GT), Not__Applicable - """) - pd.testing.assert_frame_equal(result, expected) - - -def test_drop_storage_technologies_empty_input(csv_str_to_df): - # Empty input (all columns, no rows) returns an empty frame, no errors. - new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"]) - - result = _drop_storage_technologies(new_entrants_summary) - - expected = csv_str_to_df(""" - Technology__Type, REZ__ID - """) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) - - -# --- _rename_summary_columns --- - - -def test_rename_summary_columns(csv_str_to_df): - # The IASR ID, technology and fuel columns are renamed to their schema names; - # other columns (REZ ID) pass through untouched. - gens = csv_str_to_df(""" - IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID - Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1 - CNSW__OCGT__Small, OCGT__(small__GT),Gas, NSW__new__OCGT, Not__Applicable - """) - - result = _rename_summary_columns(gens) - - expected = csv_str_to_df(""" - name, technology, fuel_type, fuel_price_mapping, REZ__ID - Q1_WH_Far__North__QLD, Wind, Wind, Wind, Q1 - CNSW__OCGT__Small, OCGT__(small__GT),Gas, NSW__new__OCGT, Not__Applicable - """) - pd.testing.assert_frame_equal(result, expected) - - -# --- _set_geo_id --- - - -def test_set_geo_id(csv_str_to_df): - # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal and - # distributed rows ("Not Applicable") fall back to their Sub-region. - gens = csv_str_to_df(""" - technology, REZ__ID, Sub-region - Wind, N3, CNSW - Large__scale__Solar__PV, N0, CNSW - OCGT__(small__GT), Not__Applicable, NQ - Distributed__Resources__Solar,Not__Applicable, SQ - """) - - result = _set_geo_id(gens) - - expected = csv_str_to_df(""" - technology, REZ__ID, Sub-region, geo_id - Wind, N3, CNSW, N3 - Large__scale__Solar__PV, N0, CNSW, N0 - OCGT__(small__GT), Not__Applicable, NQ, NQ - Distributed__Resources__Solar,Not__Applicable, SQ, SQ - """) - pd.testing.assert_frame_equal(result, expected) - - -def test_set_geo_id_empty_input(csv_str_to_df): - # Empty input still returns the geo_id column (all columns, no rows). - gens = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"]) - - result = _set_geo_id(gens) - - expected = csv_str_to_df(""" - technology, REZ__ID, Sub-region, geo_id - """) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) - - -# --- _add_resource_type --- - - -def test_add_resource_type(csv_str_to_df): - # resource_type is read from the underscore-delimited code in `name`. WH/WM are - # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the - # underscore-free thermal / distributed IDs map to NaN (blank field). - gens = csv_str_to_df(""" - name, technology - Q1_WH_Far__North__QLD, Wind - Q1_WM_Far__North__QLD, Wind - N10_WFX_Hunter__Coast, Wind__-__offshore__(fixed) - DREZ_SAT_Dubbo, Large__scale__Solar__PV - N0_CST_NSW, Solar__Thermal__(16hrs__storage) - CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar - CNSW__OCGT__Small, OCGT__(small__GT) - """) - - result = _add_resource_type(gens) - - expected = csv_str_to_df(""" - name, technology, resource_type - Q1_WH_Far__North__QLD, Wind, wind_high - Q1_WM_Far__North__QLD, Wind, wind_medium - N10_WFX_Hunter__Coast, Wind__-__offshore__(fixed), wind_offshore_fixed - DREZ_SAT_Dubbo, Large__scale__Solar__PV, solar - N0_CST_NSW, Solar__Thermal__(16hrs__storage), solar - CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar, - CNSW__OCGT__Small, OCGT__(small__GT), - """) - pd.testing.assert_frame_equal(result, expected) diff --git a/tests/test_templater/test_new_entrants.py b/tests/test_templater/test_new_entrants.py new file mode 100644 index 00000000..f211f4f2 --- /dev/null +++ b/tests/test_templater/test_new_entrants.py @@ -0,0 +1,200 @@ +import pandas as pd +import pytest + +from ispypsa.templater.new_entrants import ( + _GENERATOR_IDENTITY_COLUMNS, + _STORAGE_IDENTITY_COLUMNS, + _add_resource_type, + _filter_to_technology_group, + _set_geo_id, + _template_generators_new_entrant, + _template_storage_new_entrant, +) + +# --- orchestrators --- + + +def test_template_generators_new_entrant(csv_str_to_df): + # Wiring check only (per-helper behaviour is covered below): storage is dropped, + # the identity columns are produced, and one row per surviving generating unit + # is returned. Detailed content is covered by the per-helper tests. + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_WM_Far North QLD, Wind, Wind, Wind, Q1, NQ + Q1_SAT_Far North QLD, Large scale Solar PV, Solar, Solar, Q1, NQ + NQ OCGT Small, OCGT (small GT), Gas, QLD new OCGT, Not Applicable, NQ + NQ SAT - Distributed Resources, Distributed Resources Solar, Solar, Solar, Not Applicable, NQ + NQ Battery 2hrs, Battery Storage (2hrs storage), Battery, Battery, Not Applicable, NQ + """) + + result = _template_generators_new_entrant(new_entrants_summary) + + # storage row dropped -> 5 of 6 rows survive; identity columns produced in order + assert list(result.columns) == _GENERATOR_IDENTITY_COLUMNS + assert len(result) == 5 + + +def test_template_storage_new_entrant(csv_str_to_df): + # Wiring check only (per-helper behaviour is covered below): generators are + # dropped, the identity columns are produced, and one row per surviving storage + # unit is returned. Detailed content is covered by the per-helper tests. + new_entrants_summary = csv_str_to_df(""" + IASR ID / DLT names, Technology Type, Fuel type, Fuel cost mapping, REZ ID, Sub-region + Q1_WH_Far North QLD, Wind, Wind, Wind, Q1, NQ + NQ OCGT Small, OCGT (small GT), Gas, QLD new OCGT, Not Applicable, NQ + NQ Battery 2hrs, Battery Storage (2hrs storage), Battery, Battery, N3, NQ + NQ Battery - Distributed, Distributed Resources Batteries, Battery, Battery, Not Applicable, NQ + Snowy PH 24hr, Pumped Hydro (24hrs storage), Water, Water, Not Applicable, NQ + """) + + result = _template_storage_new_entrant(new_entrants_summary) + + # generator rows dropped -> 3 of 5 rows survive; identity columns produced in order + assert list(result.columns) == _STORAGE_IDENTITY_COLUMNS + assert len(result) == 3 + + +# --- _filter_to_technology_group --- + + +def test_filter_to_technology_group(csv_str_to_df): + # test core split/filter function returns both groups correctly + new_entrants_summary = csv_str_to_df(""" + Technology Type, REZ ID + Wind, N3 + Large scale Solar PV, N3 + Battery Storage (2hrs storage), N3 + Distributed Resources Batteries, Not Applicable + Pumped Hydro (24hrs storage), Not Applicable + OCGT (small GT), Not Applicable + """) + + # All storage variants (batteries, distributed batteries, pumped hydro) are + # dropped; generation rows pass through unchanged with other columns intact. + generators = _filter_to_technology_group(new_entrants_summary, "generators") + + expected_gens = csv_str_to_df(""" + Technology Type, REZ ID + Wind, N3 + Large scale Solar PV, N3 + OCGT (small GT), Not Applicable + """) + pd.testing.assert_frame_equal(generators, expected_gens) + + # Only storage variants (batteries, distributed batteries, pumped hydro) are + # kept - unchanged, with other columns intact. + storage = _filter_to_technology_group(new_entrants_summary, "storage") + + expected_storage = csv_str_to_df(""" + Technology Type, REZ ID + Battery Storage (2hrs storage), N3 + Distributed Resources Batteries, Not Applicable + Pumped Hydro (24hrs storage), Not Applicable + """) + pd.testing.assert_frame_equal(storage, expected_storage) + + +def test_filter_to_technology_group_raises_unknown_group(csv_str_to_df, caplog): + # Raises on non-permitted 'group' arg (not "generators" or "storage") + new_entrants_summary = csv_str_to_df(""" + Technology Type, REZ ID + Wind, N3 + Pumped Hydro (24hrs storage), Not Applicable + """) + + with pytest.raises(ValueError, match="group must be 'generators' or 'storage'"): + _filter_to_technology_group(new_entrants_summary, "computers") + + +def test_filter_to_technology_group_empty_input(csv_str_to_df): + # Empty input (all columns, no rows) returns an empty frame, no errors. + new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"]) + + result = _filter_to_technology_group(new_entrants_summary, "storage") + + expected = csv_str_to_df(""" + Technology Type, REZ ID + """) + pd.testing.assert_frame_equal(result, expected, check_dtype=False) + + +# --- _set_geo_id --- + + +def test_set_geo_id(csv_str_to_df): + # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal, pumped hydro + # and distributed rows ("Not Applicable") fall back to their Sub-region. + new_entrants = csv_str_to_df(""" + technology, REZ ID, Sub-region + Wind, N3, CNSW + Large scale Solar PV, N0, CNSW + OCGT (small GT), Not Applicable, NQ + Pumped Hydro (24hrs storage), Not Applicable, SNW + """) + + result = _set_geo_id(new_entrants) + + expected = csv_str_to_df(""" + technology, REZ ID, Sub-region, geo_id + Wind, N3, CNSW, N3 + Large scale Solar PV, N0, CNSW, N0 + OCGT (small GT), Not Applicable, NQ, NQ + Pumped Hydro (24hrs storage), Not Applicable, SNW, SNW + """) + pd.testing.assert_frame_equal(result, expected) + + +def test_set_geo_id_empty_input(csv_str_to_df): + # Empty input still returns the geo_id column (all columns, no rows). + new_entrants = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"]) + + result = _set_geo_id(new_entrants) + + expected = csv_str_to_df(""" + technology, REZ ID, Sub-region, geo_id + """) + pd.testing.assert_frame_equal(result, expected, check_dtype=False) + + +# --- _add_resource_type (generator-specific) --- + + +def test_add_resource_type(csv_str_to_df): + # resource_type is read from the underscore-delimited code in `name`. WH/WM are + # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the + # underscore-free thermal / distributed IDs map to NaN (blank field). + gens = csv_str_to_df(""" + name, technology + Q1_WH_Far North QLD, Wind + Q1_WM_Far North QLD, Wind + N10_WFX_Hunter Coast, Wind - offshore (fixed) + DREZ_SAT_Dubbo, Large scale Solar PV + N0_CST_NSW, Solar Thermal (16hrs storage) + CNSW SAT - Distributed Resources, Distributed Resources Solar + CNSW OCGT Small, OCGT (small GT) + """) + + result = _add_resource_type(gens) + + expected = csv_str_to_df(""" + name, technology, resource_type + Q1_WH_Far North QLD, Wind, wind_high + Q1_WM_Far North QLD, Wind, wind_medium + N10_WFX_Hunter Coast, Wind - offshore (fixed), wind_offshore_fixed + DREZ_SAT_Dubbo, Large scale Solar PV, solar + N0_CST_NSW, Solar Thermal (16hrs storage), solar + CNSW SAT - Distributed Resources, Distributed Resources Solar, + CNSW OCGT Small, OCGT (small GT), + """) + pd.testing.assert_frame_equal(result, expected) + + +def test_add_resource_type_empty_input(): + # test empty input still returns the input df columns + resource_type column + empty_input = pd.DataFrame(columns=["name", "technology"]) + + result = _add_resource_type(empty_input) + + expected = pd.DataFrame(columns=["name", "technology", "resource_type"]) + pd.testing.assert_frame_equal(result, expected) From e7a097ebfa812a3d5e01b3656e1024aaafd10c63 Mon Sep 17 00:00:00 2001 From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com> Date: Fri, 19 Jun 2026 14:38:45 +1000 Subject: [PATCH 4/5] clean up docstrings and add extra i/o examples --- src/ispypsa/templater/new_entrants.py | 53 ++++++++++++--------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py index 2e5f6e89..188217ea 100644 --- a/src/ispypsa/templater/new_entrants.py +++ b/src/ispypsa/templater/new_entrants.py @@ -1,33 +1,17 @@ """Templates the new entrant generator and storage identity tables. Both tables are currently built from a single IASR input, the ``new_entrants_summary`` -table. This module splits that table into its two halves and shapes each into the +table. This module splits that table into its two subsets and shapes each into the identity columns of its target schema (see schemas/generators_new_entrant.yaml and schemas/storage_new_entrant.yaml). There are two independent public orchestrators, one per output table, each taking the full summary. They share the same shape: - 1. Filter the summary to the relevant technology group — generators or storage - — with _filter_to_technology_group. The storage predicate (battery, pumped - hydro — see _STORAGE_TECHNOLOGY_STRINGS) lives in that one function, so the - two orchestrators can't drift out of sync on what counts as storage. + 1. Filter the summary to the relevant technology group 2. Rename the carried-over summary columns to their schema names - (_SUMMARY_COLUMN_RENAMES). The summary's own values are treated as - canonical; no cross-table canonicalisation is applied here. - 3. Derive geo_id: REZ-located units (most VRE, REZ-co-located batteries) take - their REZ ID; sub-region-located units (thermal, pumped hydro, distributed - resources) have "REZ ID" == "Not Applicable" and take their Sub-region. - 4. (Generators only) Derive resource_type from the resource-quality code - embedded in the IASR ID — see _add_resource_type. - 5. Select the table's identity columns. _SUMMARY_COLUMN_RENAMES is shared, so - a column it renames that this table does not want (fuel_price_mapping for - storage) simply falls away here. - -Reference detail: - - _SUMMARY_COLUMN_RENAMES: source column -> schema column, shared across both - tables; each table keeps only the renamed columns its identity list needs. - - _STORAGE_TECHNOLOGY_STRINGS: the substrings that mark a row as storage. - - _RESOURCE_QUALITY_CODE_TO_TYPE: VRE resource-quality code -> resource_type. + 3. Derive geo_id + 4. (Generators only) Derive resource_type + 5. Select the table's group-specific identity columns. """ import logging @@ -93,11 +77,18 @@ def _template_generators_new_entrant( names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE resource code in the IASR ID), and returns the identity columns. - Args: - new_entrants_summary: IASR ``new_entrants_summary`` table. + I/O Example: + new_entrants_summary (abbr.): + IASR ID Power Station Technology Type REZ ID Sub-region Fuel type Fuel cost mapping + N3_WH_rez N3_WH_rez Wind N3 NNSW Wind Wind + N3 Battery N3 Battery Battery (2hrs) N3 NNSW Battery Battery + SQ CCGT SQ CCGT CCGT Not Applicable SQ Gas QLD new CCGT Returns: - One row per generating unit with columns ``_GENERATOR_IDENTITY_COLUMNS``. + name technology resource_type geo_id fuel_type fuel_price_mapping + N3_WH_rez Wind wind_high N3 Wind Wind + SQ CCGT CCGT SQ Gas QLD new CCGT + """ logging.info("Creating a template for new entrant generators") gens = _filter_to_technology_group(new_entrants_summary, "generators") @@ -116,11 +107,16 @@ def _template_storage_new_entrant( Keeps only storage rows, renames the carried-over summary columns to schema names, derives geo_id (REZ ID or sub-region), and returns the identity columns. - Args: - new_entrants_summary: IASR ``new_entrants_summary`` table. + I/O Example: + new_entrants_summary (abbr.): + IASR ID Power Station Technology Type REZ ID Sub-region Fuel type Fuel cost mapping + N3_WH_rez N3_WH_rez Wind N3 NNSW Wind Wind + N3 Battery N3 Battery Battery (2hrs) N3 NNSW Battery Battery + SQ CCGT SQ CCGT CCGT Not Applicable SQ Gas QLD new CCGT Returns: - One row per storage unit with columns ``_STORAGE_IDENTITY_COLUMNS``. + name technology geo_id fuel_type + N3 Battery Battery (2hrs) N3 Battery """ logging.info("Creating a template for new entrant storage") storage = _filter_to_technology_group(new_entrants_summary, "storage") @@ -144,8 +140,7 @@ def _filter_to_technology_group( boundary is defined. Args: - new_entrants_summary: the IASR ``new_entrants_summary`` table (any frame - with a "Technology Type" column). + new_entrants_summary: the IASR ``new_entrants_summary`` table group: "generators" or "storage". I/O Example: From aba1ad028e74fdae27c84b5966e3f40f92fa5482 Mon Sep 17 00:00:00 2001 From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com> Date: Wed, 24 Jun 2026 17:11:26 +1000 Subject: [PATCH 5/5] pull out common helpers to set geo_id and check battery and storage rows --- .../custom_constraints_from_plexos.py | 25 +----- src/ispypsa/templater/helpers.py | 45 ++++++++++ src/ispypsa/templater/new_entrants.py | 84 +++---------------- .../test_custom_constraints_from_plexos.py | 38 --------- tests/test_templater/test_helpers.py | 79 +++++++++++++++++ tests/test_templater/test_new_entrants.py | 75 +---------------- 6 files changed, 140 insertions(+), 206 deletions(-) diff --git a/src/ispypsa/templater/custom_constraints_from_plexos.py b/src/ispypsa/templater/custom_constraints_from_plexos.py index c2a4672e..68c35fbe 100644 --- a/src/ispypsa/templater/custom_constraints_from_plexos.py +++ b/src/ispypsa/templater/custom_constraints_from_plexos.py @@ -176,6 +176,7 @@ import pandas as pd +from .helpers import _is_battery_row, _pick_location from .mappings import _CANONICAL_TIMESLICES # PLEXOS REZ-id prefixes that IASR renamed to DREZ. Applied to the first @@ -924,30 +925,6 @@ def _battery_to_location(new_entrants: pd.DataFrame) -> dict[str, str]: return dict(zip(batteries["IASR ID / DLT names"], locations)) -def _is_battery_row(new_entrants: pd.DataFrame) -> pd.Series: - """Boolean mask selecting battery rows in new_entrants_summary. - - Matches any Technology Type that contains the literal substring - "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular) - and "Distributed Resources Batteries" (plural). Other storage - technologies (pumped hydro, solar thermal) intentionally do not match. - """ - return new_entrants["Technology Type"].str.contains("Batter", na=False) - - -def _pick_location(row: pd.Series) -> str: - """Return REZ ID when populated, otherwise Sub-region. - - I/O Example: - {"REZ ID": "Q8", "Sub-region": "SQ"} -> "Q8" - {"REZ ID": "Not Applicable", "Sub-region": "SQ"} -> "SQ" - """ - rez_id = row["REZ ID"] - if pd.notna(rez_id) and rez_id != "Not Applicable": - return rez_id - return row["Sub-region"] - - def _triggered_locations_per_constraint( lhs: pd.DataFrame, unit_to_location: dict[str, str] ) -> pd.DataFrame: diff --git a/src/ispypsa/templater/helpers.py b/src/ispypsa/templater/helpers.py index eb229e63..da9456b5 100644 --- a/src/ispypsa/templater/helpers.py +++ b/src/ispypsa/templater/helpers.py @@ -384,6 +384,51 @@ def _strip_all_text_after_numeric_value( return series +def _pick_location(row: pd.Series) -> str: + """Return a technology's REZ ID when populated, otherwise Sub-region. + + I/O Example: + {"REZ ID": "Q8", "Sub-region": "SQ"} -> "Q8" + {"REZ ID": "Not Applicable", "Sub-region": "SQ"} -> "SQ" + """ + rez_id = row["REZ ID"] + if pd.notna(rez_id) and rez_id != "Not Applicable": + return rez_id + return row["Sub-region"] + + +def _is_battery_row( + df: pd.DataFrame, col_to_check: str = "Technology Type" +) -> pd.Series: + """Boolean mask selecting battery technology rows in ``df``. + + Matches any ``col_to_check`` row that contains the literal substring + "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular) + and "Distributed Resources Batteries" (plural). Other storage + technologies (pumped hydro, solar thermal) intentionally do not match. + """ + return df[col_to_check].str.contains("Batter", na=False) + + +def _is_pumped_hydro_row( + df: pd.DataFrame, col_to_check: str = "Technology Type" +) -> pd.Series: + """Boolean mask selecting pumped hydro technology rows in ``df``. + + Matches any ``col_to_check`` row that contains the literal substring + "Pumped Hydro" -- covering all durations. Other storage technologies + (batteries, solar thermal) intentionally do not match. + """ + return df[col_to_check].str.contains("Pumped Hydro", na=False) + + +def _is_storage_row( + df: pd.DataFrame, col_to_check: str = "Technology Type" +) -> pd.Series: + """Wrapper that returns union of ``_is_battery_row`` and ``_is_pumped_hydro_row``.""" + return _is_battery_row(df, col_to_check) | _is_pumped_hydro_row(df, col_to_check) + + def _standardise_storage_capitalisation(series: pd.Series) -> pd.Series: """ Standardises capitalisation of "storage" in a pandas Series. diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py index 188217ea..5f06d2ef 100644 --- a/src/ispypsa/templater/new_entrants.py +++ b/src/ispypsa/templater/new_entrants.py @@ -18,7 +18,10 @@ import pandas as pd -from ispypsa.templater.helpers import _where_any_substring_appears +from ispypsa.templater.helpers import ( + _is_storage_row, + _pick_location, +) _GENERATOR_IDENTITY_COLUMNS = [ "name", @@ -36,8 +39,6 @@ "fuel_type", ] -_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"] - # Source (IASR new_entrants_summary) column names → schema output column names. _SUMMARY_COLUMN_RENAMES = { "IASR ID / DLT names": "name", @@ -57,8 +58,8 @@ "CST": "solar", } -# Extraction pattern for the resource-quality code embedded between underscores in -# a VRE IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". +# Regex extracting the resource-quality code embedded between underscores in a VRE # IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". Derived from the code map, it +# expands to "_(WFX|WFL|SAT|...)_" — one capture group over the known codes # sorted longest-first so a short code can't shadow a longer one it prefixes. _RESOURCE_CODE_PATTERN = "_({})_".format( "|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True)) ) @@ -91,7 +92,7 @@ def _template_generators_new_entrant( """ logging.info("Creating a template for new entrant generators") - gens = _filter_to_technology_group(new_entrants_summary, "generators") + gens = new_entrants_summary[~_is_storage_row(new_entrants_summary)].copy() gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES) gens = _set_geo_id(gens) gens = _add_resource_type(gens) @@ -119,7 +120,7 @@ def _template_storage_new_entrant( N3 Battery Battery (2hrs) N3 Battery """ logging.info("Creating a template for new entrant storage") - storage = _filter_to_technology_group(new_entrants_summary, "storage") + storage = new_entrants_summary[_is_storage_row(new_entrants_summary)].copy() storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES) storage = _set_geo_id(storage) return storage[_STORAGE_IDENTITY_COLUMNS] @@ -128,74 +129,13 @@ def _template_storage_new_entrant( # --- shared helpers --- -def _filter_to_technology_group( - new_entrants_summary: pd.DataFrame, group: str -) -> pd.DataFrame: - """Returns the summary rows for one technology group: generators or storage. - - Storage rows are those whose "Technology Type" contains a - ``_STORAGE_TECHNOLOGY_STRINGS`` substring (battery, pumped hydro), matched - case-insensitively; generators are every other row. The two groups partition - the summary, so this single predicate is the only place the generator/storage - boundary is defined. - - Args: - new_entrants_summary: the IASR ``new_entrants_summary`` table - group: "generators" or "storage". - - I/O Example: - new_entrants_summary: - Technology Type REZ ID - Wind N3 - Battery Storage (2hrs storage) N3 - Pumped Hydro (24hrs storage) Not Applicable - OCGT (small GT) Not Applicable - - group="generators" returns: - Technology Type REZ ID - Wind N3 - OCGT (small GT) Not Applicable - - group="storage" returns: - Technology Type REZ ID - Battery Storage (2hrs storage) N3 - Pumped Hydro (24hrs storage) Not Applicable - """ - is_storage = _where_any_substring_appears( - new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS - ) - if group == "storage": - return new_entrants_summary.loc[is_storage].reset_index(drop=True) - if group == "generators": - return new_entrants_summary.loc[~is_storage].reset_index(drop=True) - raise ValueError( - "Filtering new entrants table to technology group: " - f"group must be 'generators' or 'storage', got {group!r}" - ) - - def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame: - """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region. + """Adds 'geo_id' column to new_entrants containing REZ ID with Sub-region fallback. - I/O Example: - new_entrants: - technology REZ ID Sub-region - Wind N3 CNSW - Large scale Solar PV N0 CNSW # Non-REZ: kept as-is - OCGT (small GT) Not Applicable NQ - Pumped Hydro (24hrs storage) Not Applicable SNW - - returns (adds geo_id): - technology REZ ID Sub-region geo_id - Wind N3 CNSW N3 - Large scale Solar PV N0 CNSW N0 - OCGT (small GT) Not Applicable NQ NQ - Pumped Hydro (24hrs storage) Not Applicable SNW SNW + Applies ``_pick_location`` helper to each row of the new_entrants table to + set their 'geo_id'. Simple wrapper for readability. """ - new_entrants = new_entrants.copy() - new_entrants["geo_id"] = new_entrants["REZ ID"].where( - new_entrants["REZ ID"] != "Not Applicable", new_entrants["Sub-region"] - ) + new_entrants["geo_id"] = new_entrants.apply(_pick_location, axis=1) return new_entrants diff --git a/tests/test_templater/test_custom_constraints_from_plexos.py b/tests/test_templater/test_custom_constraints_from_plexos.py index 2c3ae7a9..ce377382 100644 --- a/tests/test_templater/test_custom_constraints_from_plexos.py +++ b/tests/test_templater/test_custom_constraints_from_plexos.py @@ -31,12 +31,10 @@ _generator_to_location, _iasr_id_choices, _inject_iasr_new_entrant_batteries, - _is_battery_row, _line_variable_name, _location_battery_pairs, _log_injected_batteries, _match_unit_name, - _pick_location, _plexos_extract_dir, _rename_battery_name, _rename_first_token, @@ -830,42 +828,6 @@ def test_build_custom_constraints_rhs_maps_to_region_prefixed_canonical_timeslic pd.testing.assert_frame_equal(result, expected) -# --- _is_battery_row --- - - -def test_is_battery_row(csv_str_to_df): - new_entrants = csv_str_to_df(""" - IASR ID / DLT names, Technology Type - Q1 Battery - 2h, Battery Storage (2hrs storage) - NQ Battery - Dist, Distributed Resources Batteries - Q1 Wind, Wind - N1 Pumped Hydro - 24h,Pumped Hydro (24hrs storage) - Q1 Solar Thermal, Solar Thermal (16hrs storage) - """) - - result = _is_battery_row(new_entrants) - - # Battery + Distributed Resources Batteries match; others (incl. pumped - # hydro and solar thermal storage) do not. - assert list(result) == [True, True, False, False, False] - - -# --- _pick_location --- - - -@pytest.mark.parametrize( - "rez_id, sub_region, expected", - [ - ("Q8", "SQ", "Q8"), # REZ ID populated -> REZ ID - ("Not Applicable", "SQ", "SQ"), # 'Not Applicable' -> Sub-region - (None, "SQ", "SQ"), # NaN/None -> Sub-region - ], -) -def test_pick_location(rez_id, sub_region, expected): - row = pd.Series({"REZ ID": rez_id, "Sub-region": sub_region}) - assert _pick_location(row) == expected - - # --- _generator_to_location --- diff --git a/tests/test_templater/test_helpers.py b/tests/test_templater/test_helpers.py index 2696aa3c..6d474501 100644 --- a/tests/test_templater/test_helpers.py +++ b/tests/test_templater/test_helpers.py @@ -2,8 +2,12 @@ import pytest from ispypsa.templater.helpers import ( + _is_battery_row, + _is_pumped_hydro_row, + _is_storage_row, _looks_like_financial_year, _manual_remove_footnotes_from_generator_names, + _pick_location, _rez_name_to_id_mapping, _snakecase_string, _standardise_storage_capitalisation, @@ -373,3 +377,78 @@ def test_looks_like_financial_year_matches_only_canonical_formats(): assert _looks_like_financial_year("24-25") is False assert _looks_like_financial_year("Status") is False assert _looks_like_financial_year("Flow path") is False + + +# --- _pick_location --- + + +@pytest.mark.parametrize( + "rez_id, sub_region, expected", + [ + ("Q8", "SQ", "Q8"), # REZ ID populated -> REZ ID + ("Not Applicable", "SQ", "SQ"), # 'Not Applicable' -> Sub-region + (None, "SQ", "SQ"), # NaN/None -> Sub-region + ], +) +def test_pick_location(rez_id, sub_region, expected): + row = pd.Series({"REZ ID": rez_id, "Sub-region": sub_region}) + assert _pick_location(row) == expected + + +# --- _is_battery_row --- + + +def test_is_battery_row(csv_str_to_df): + new_entrants = csv_str_to_df(""" + IASR ID / DLT names, Technology Type + Q1 Battery - 2h, Battery Storage (2hrs storage) + NQ Battery - Dist, Distributed Resources Batteries + Q1 Wind, Wind + N1 Pumped Hydro - 24h, Pumped Hydro (24hrs storage) + Q1 Solar Thermal, Solar Thermal (16hrs storage) + """) + + result = _is_battery_row(new_entrants) + + # Battery + Distributed Resources Batteries match; others (incl. pumped + # hydro and solar thermal storage) do not. + assert list(result) == [True, True, False, False, False] + + +# --- _is_pumped_hydro_row --- + + +def test_is_pumped_hydro_row(csv_str_to_df): + new_entrants = csv_str_to_df(""" + IASR ID / DLT names, Technology Type + Q1 Battery - 2h, Battery Storage (2hrs storage) + NQ Battery - Dist, Distributed Resources Batteries + Q1 Wind, Wind + N1 Pumped Hydro - 24h, Pumped Hydro (24hrs storage) + Q1 Solar Thermal, Solar Thermal (16hrs storage) + """) + + result = _is_pumped_hydro_row(new_entrants) + + # Pumped Hydro resources match; Batter* and other storage do not. + assert list(result) == [False, False, False, True, False] + + +# --- _is_storage_row --- + + +def test_is_storage_row(csv_str_to_df): + new_entrants = csv_str_to_df(""" + IASR ID / DLT names, Technology + Q1 Battery - 2h, Battery Storage (2hrs storage) + NQ Battery - Dist, Distributed Resources Batteries + Q1 Wind, Wind + N1 Pumped Hydro - 24h, Pumped Hydro (24hrs storage) + Q1 Solar Thermal, Solar Thermal (16hrs storage) + """) + + result = _is_storage_row(new_entrants, col_to_check="Technology") + + # Battery, Distributed Resources Batteries and Pumped Hydro all match. + # Solar thermal still does not. + assert list(result) == [True, True, False, True, False] diff --git a/tests/test_templater/test_new_entrants.py b/tests/test_templater/test_new_entrants.py index f211f4f2..2b32d386 100644 --- a/tests/test_templater/test_new_entrants.py +++ b/tests/test_templater/test_new_entrants.py @@ -5,7 +5,6 @@ _GENERATOR_IDENTITY_COLUMNS, _STORAGE_IDENTITY_COLUMNS, _add_resource_type, - _filter_to_technology_group, _set_geo_id, _template_generators_new_entrant, _template_storage_new_entrant, @@ -55,82 +54,16 @@ def test_template_storage_new_entrant(csv_str_to_df): assert len(result) == 3 -# --- _filter_to_technology_group --- - - -def test_filter_to_technology_group(csv_str_to_df): - # test core split/filter function returns both groups correctly - new_entrants_summary = csv_str_to_df(""" - Technology Type, REZ ID - Wind, N3 - Large scale Solar PV, N3 - Battery Storage (2hrs storage), N3 - Distributed Resources Batteries, Not Applicable - Pumped Hydro (24hrs storage), Not Applicable - OCGT (small GT), Not Applicable - """) - - # All storage variants (batteries, distributed batteries, pumped hydro) are - # dropped; generation rows pass through unchanged with other columns intact. - generators = _filter_to_technology_group(new_entrants_summary, "generators") - - expected_gens = csv_str_to_df(""" - Technology Type, REZ ID - Wind, N3 - Large scale Solar PV, N3 - OCGT (small GT), Not Applicable - """) - pd.testing.assert_frame_equal(generators, expected_gens) - - # Only storage variants (batteries, distributed batteries, pumped hydro) are - # kept - unchanged, with other columns intact. - storage = _filter_to_technology_group(new_entrants_summary, "storage") - - expected_storage = csv_str_to_df(""" - Technology Type, REZ ID - Battery Storage (2hrs storage), N3 - Distributed Resources Batteries, Not Applicable - Pumped Hydro (24hrs storage), Not Applicable - """) - pd.testing.assert_frame_equal(storage, expected_storage) - - -def test_filter_to_technology_group_raises_unknown_group(csv_str_to_df, caplog): - # Raises on non-permitted 'group' arg (not "generators" or "storage") - new_entrants_summary = csv_str_to_df(""" - Technology Type, REZ ID - Wind, N3 - Pumped Hydro (24hrs storage), Not Applicable - """) - - with pytest.raises(ValueError, match="group must be 'generators' or 'storage'"): - _filter_to_technology_group(new_entrants_summary, "computers") - - -def test_filter_to_technology_group_empty_input(csv_str_to_df): - # Empty input (all columns, no rows) returns an empty frame, no errors. - new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"]) - - result = _filter_to_technology_group(new_entrants_summary, "storage") - - expected = csv_str_to_df(""" - Technology Type, REZ ID - """) - pd.testing.assert_frame_equal(result, expected, check_dtype=False) - - # --- _set_geo_id --- def test_set_geo_id(csv_str_to_df): - # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal, pumped hydro - # and distributed rows ("Not Applicable") fall back to their Sub-region. + # Check that the wrapper adds 'geo_id' column, correctly applying ``_pick_location`` + # and not impacting existing columns. new_entrants = csv_str_to_df(""" technology, REZ ID, Sub-region Wind, N3, CNSW - Large scale Solar PV, N0, CNSW OCGT (small GT), Not Applicable, NQ - Pumped Hydro (24hrs storage), Not Applicable, SNW """) result = _set_geo_id(new_entrants) @@ -138,15 +71,13 @@ def test_set_geo_id(csv_str_to_df): expected = csv_str_to_df(""" technology, REZ ID, Sub-region, geo_id Wind, N3, CNSW, N3 - Large scale Solar PV, N0, CNSW, N0 OCGT (small GT), Not Applicable, NQ, NQ - Pumped Hydro (24hrs storage), Not Applicable, SNW, SNW """) pd.testing.assert_frame_equal(result, expected) def test_set_geo_id_empty_input(csv_str_to_df): - # Empty input still returns the geo_id column (all columns, no rows). + # Empty input still returns the added geo_id column new_entrants = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"]) result = _set_geo_id(new_entrants)