From 8fd519f6adadd45725686051bfa964a085ebf25b Mon Sep 17 00:00:00 2001
From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com>
Date: Thu, 11 Jun 2026 11:16:59 +1000
Subject: [PATCH 1/5] add ClAUDE.local.md to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 569bc5a5..c2782d11 100644
--- a/.gitignore
+++ b/.gitignore
@@ -191,6 +191,7 @@ notes/
 .DS_Store
 # ignore claude stuff
 .claude/settings.local.json
+CLAUDE.local.md
 
 # ignore all pypsa output files
 *.nc

From d4c381095a82659817560f829d834676a956ae58 Mon Sep 17 00:00:00 2001
From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com>
Date: Thu, 11 Jun 2026 11:31:30 +1000
Subject: [PATCH 2/5] add identity templating for new entrant generators

---
 src/ispypsa/templater/create_template.py      |  15 +-
 .../templater/generators_new_entrant.py       | 179 ++++++++++++++++++
 .../test_local_cache.py                       |   2 +
 .../test_create_ispypsa_inputs_template.py    |  47 ++++-
 .../test_generators_new_entrant.py            | 165 ++++++++++++++++
 5 files changed, 397 insertions(+), 11 deletions(-)
 create mode 100644 src/ispypsa/templater/generators_new_entrant.py
 create mode 100644 tests/test_templater/test_generators_new_entrant.py

diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py
index bf26486a..d3e1fe71 100644
--- a/src/ispypsa/templater/create_template.py
+++ b/src/ispypsa/templater/create_template.py
@@ -21,6 +21,9 @@
     _template_sub_regional_flow_path_costs,
     _template_sub_regional_flow_paths,
 )
+from ispypsa.templater.generators_new_entrant import (
+    _template_generators_new_entrant,
+)
 from ispypsa.templater.geography import _template_network_geography
 from ispypsa.templater.network_expansion import (
     _extract_flow_path_costs_from_iasr,
@@ -220,10 +223,6 @@ def create_ispypsa_inputs_template(
         template["network_expansion_options"] = expansion_options
         template["network_transmission_path_expansion_costs"] = expansion_costs
 
-        # todo: replace with actual generators_new_entrant once that templating
-        # function is written — passing empty placeholder for now so costs_connection
-        # is wired up but produces no VRE rows until generators are templated.
-
         # connection_capacity_non_vre is in manually_extracted_template_tables/ (sourced from
         # ENOR tables 16-17 and confirmed with AEMO) but is needed as an iasr_tables input,
         # not a template output. TODO revisit when more manual tables added and consider
@@ -232,7 +231,13 @@ def create_ispypsa_inputs_template(
             "connection_capacity_non_vre"
         ].copy()
 
-        generators_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
+        # Identity columns only for now (name, technology, resource_type, geo_id,
+        # fuel_type, fuel_price_mapping); cost/property columns are added in later
+        # PRs. Feeds costs_connection but is not yet a written template output.
+        generators_new_entrant = _template_generators_new_entrant(
+            iasr_tables["new_entrants_summary"]
+        )
+        # storage_new_entrant remains defined (empty) for wiring tests
         storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
         template["costs_connection"] = _template_connection_costs(
             iasr_tables,
diff --git a/src/ispypsa/templater/generators_new_entrant.py b/src/ispypsa/templater/generators_new_entrant.py
new file mode 100644
index 00000000..c0b73e99
--- /dev/null
+++ b/src/ispypsa/templater/generators_new_entrant.py
@@ -0,0 +1,179 @@
+# Templates the `generators_new_entrant` table: one row per new entrant generating
+# unit, with storage technologies excluded (those are templated separately into the
+# storage new-entrants table). See schemas/generators_new_entrant.yaml for the target.
+import logging
+
+import pandas as pd
+
+from ispypsa.templater.helpers import _where_any_substring_appears
+
+_IDENTITY_COLUMNS = [
+    "name",
+    "technology",
+    "resource_type",
+    "geo_id",
+    "fuel_type",
+    "fuel_price_mapping",
+]
+
+_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"]
+
+# Source (IASR new_entrants_summary) column names → schema output column names.
+# The summary's own values are treated as canonical; no cross-table canonicalisation
+# is applied here. "IASR ID / DLT names" is an existing unique identifier per row.
+_SUMMARY_COLUMN_RENAMES = {
+    "IASR ID / DLT names": "name",
+    "Technology Type": "technology",
+    "Fuel type": "fuel_type",
+    "Fuel cost mapping": "fuel_price_mapping",
+}
+
+# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a
+# mapping for it if/when resource_limits templating requires one.
+_RESOURCE_QUALITY_CODE_TO_TYPE = {
+    "WH": "wind_high",
+    "WM": "wind_medium",
+    "WFX": "wind_offshore_fixed",
+    "WFL": "wind_offshore_floating",
+    "SAT": "solar",
+    "CST": "solar",
+}
+
+
+# NOTE: partial scope intentional - other columns to be added in next PRs!
+def _template_generators_new_entrant(
+    new_entrants_summary: pd.DataFrame,
+) -> pd.DataFrame:
+    """Templates the new entrant generators identity table from the IASR summary.
+
+    Drops storage, renames the carried-over summary columns to schema names, derives
+    geo_id (REZ ID or sub-region) and resource_type (from the VRE resource code in
+    the IASR ID), and returns the identity columns.
+
+    Args:
+        new_entrants_summary: IASR ``new_entrants_summary`` table.
+
+    Returns:
+        One row per generating unit with columns ``_IDENTITY_COLUMNS``.
+    """
+    logging.info("Creating a template for new entrant generators")
+    gens = _drop_storage_technologies(new_entrants_summary)
+    gens = _rename_summary_columns(gens)
+    gens = _set_geo_id(gens)
+    gens = _add_resource_type(gens)
+    return gens[_IDENTITY_COLUMNS]
+
+
+def _drop_storage_technologies(new_entrants_summary: pd.DataFrame) -> pd.DataFrame:
+    """Drops storage rows from the new entrants summary, keeping only generators.
+
+    Storage (batteries, distributed batteries, pumped hydro) is templated into the
+    storage new-entrants table, so it is removed here. Matching is case-insensitive
+    on the "Technology Type" column (see ``_STORAGE_TECHNOLOGY_STRINGS``).
+
+    I/O Example:
+        new_entrants_summary:
+            Technology Type                  REZ ID
+            Wind                             N3
+            Large scale Solar PV             N3
+            Battery Storage (2hrs storage)   N3       # storage: dropped
+            Distributed Resources Batteries  Not Applicable  # storage: dropped
+            Pumped Hydro (24hrs storage)     Not Applicable  # storage: dropped
+            OCGT (small GT)                  Not Applicable
+
+        returns:
+            Technology Type                  REZ ID
+            Wind                             N3
+            Large scale Solar PV             N3
+            OCGT (small GT)                  Not Applicable
+    """
+    is_storage = _where_any_substring_appears(
+        new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS
+    )
+    return new_entrants_summary.loc[~is_storage].reset_index(drop=True)
+
+
+def _rename_summary_columns(gens: pd.DataFrame) -> pd.DataFrame:
+    """Renames the summary's identifier, technology and fuel columns to schema names.
+
+    See ``_SUMMARY_COLUMN_RENAMES``. Other columns (e.g. "REZ ID", "Sub-region",
+    still needed to derive geo_id) pass through untouched.
+
+    I/O Example:
+        gens:
+            IASR ID / DLT names   Technology Type   Fuel type   Fuel cost mapping   REZ ID
+            Q1_WH_Far North QLD   Wind              Wind        Wind                Q1
+            CNSW OCGT Small       OCGT (small GT)   Gas         NSW new OCGT        Not Applicable
+
+        returns:
+            name                  technology        fuel_type   fuel_price_mapping  REZ ID
+            Q1_WH_Far North QLD   Wind              Wind        Wind                Q1
+            CNSW OCGT Small       OCGT (small GT)   Gas         NSW new OCGT        Not Applicable
+    """
+    return gens.rename(columns=_SUMMARY_COLUMN_RENAMES)
+
+
+def _set_geo_id(gens: pd.DataFrame) -> pd.DataFrame:
+    """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region.
+
+    REZ-located generators (VRE) carry a real "REZ ID"; thermal and distributed
+    resource rows have "REZ ID" == "Not Applicable" and sit at the sub-region, so
+    they take their "Sub-region" value instead. Non-REZ IDs (e.g. N0, V0) flow
+    through unchanged as REZ IDs.
+
+    I/O Example:
+        gens:
+            technology             REZ ID           Sub-region
+            Wind                   N3               CNSW
+            Large scale Solar PV   N0               CNSW       # Non-REZ: kept as-is
+            OCGT (small GT)        Not Applicable   NQ
+            Distributed Resources Solar  Not Applicable  SQ
+
+        returns (adds geo_id):
+            technology             REZ ID           Sub-region  geo_id
+            Wind                   N3               CNSW        N3
+            Large scale Solar PV   N0               CNSW        N0
+            OCGT (small GT)        Not Applicable   NQ          NQ
+            Distributed Resources Solar  Not Applicable  SQ     SQ
+    """
+    gens = gens.copy()
+    gens["geo_id"] = gens["REZ ID"].where(
+        gens["REZ ID"] != "Not Applicable", gens["Sub-region"]
+    )
+    return gens
+
+
+def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame:
+    """Adds the VRE ``resource_type`` column from the resource code in ``name``.
+
+    VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH"
+    in "Q1_WH_Far North QLD" (wind high) or "SAT" in "DREZ_SAT_Dubbo" (solar). The
+    code is extracted and mapped via ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with
+    no matching code — the underscore-free thermal and distributed-resource rows —
+    get NaN, meaning no VRE build-limit applies.
+
+    I/O Example:
+        gens:
+            name                              technology
+            Q1_WH_Far North QLD               Wind
+            Q1_WM_Far North QLD               Wind
+            N10_WFX_Hunter Coast              Wind - offshore (fixed)
+            DREZ_SAT_Dubbo                    Large scale Solar PV
+            N0_CST_NSW                        Solar Thermal (16hrs storage)
+            CNSW SAT - Distributed Resources  Distributed Resources Solar
+            CNSW OCGT Small                   OCGT (small GT)
+
+        returns (adds resource_type):
+            name                              technology                     resource_type
+            Q1_WH_Far North QLD               Wind                           wind_high
+            Q1_WM_Far North QLD               Wind                           wind_medium
+            N10_WFX_Hunter Coast              Wind - offshore (fixed)        wind_offshore_fixed
+            DREZ_SAT_Dubbo                    Large scale Solar PV           solar
+            N0_CST_NSW                        Solar Thermal (16hrs storage)  solar  # CST -> solar
+            CNSW SAT - Distributed Resources  Distributed Resources Solar   NaN  # no _ token
+            CNSW OCGT Small                   OCGT (small GT)                NaN  # no _ token
+    """
+    gens = gens.copy()
+    resource_code = gens["name"].str.extract(r"_(WH|WM|WFX|WFL|SAT|CST)_", expand=False)
+    gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE)
+    return gens
diff --git a/tests/test_iasr_table_caching/test_local_cache.py b/tests/test_iasr_table_caching/test_local_cache.py
index 28670f81..5e43df7f 100644
--- a/tests/test_iasr_table_caching/test_local_cache.py
+++ b/tests/test_iasr_table_caching/test_local_cache.py
@@ -27,6 +27,8 @@ def test_build_required_tables_new_format():
     assert "connection_costs_for_wind_and_solar" in result
     assert "connection_costs_other" in result
     assert "efficient_level_of_system_strength_cost" in result
+    # New entrant generator summary feeds the generators_new_entrant templater
+    assert "new_entrants_summary" in result
 
 
 def test_build_required_tables_old_format():
diff --git a/tests/test_templater/test_create_ispypsa_inputs_template.py b/tests/test_templater/test_create_ispypsa_inputs_template.py
index a935853c..0c13d968 100644
--- a/tests/test_templater/test_create_ispypsa_inputs_template.py
+++ b/tests/test_templater/test_create_ispypsa_inputs_template.py
@@ -226,6 +226,16 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df):
         label,  2024-25
         IBR,    10
     """)
+    # Two VRE generators at Q1 (which has a connection cost forecast) so
+    # costs_connection produces rows; an OCGT and a storage row check that
+    # non-VRE pass through and storage is dropped.
+    new_entrants_summary = csv_str_to_df("""
+        IASR__ID__/__DLT__names, Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
+        Q1_WH_Far__North__QLD,   Wind,                              Wind,       Wind,                Q1,              NQ
+        Q1_SAT_Far__North__QLD,  Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
+        NQ__OCGT__Small,         OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
+        NQ__Battery__2hrs,       Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+    """)
 
     with (
         patch(
@@ -255,6 +265,7 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df):
                 "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar,
                 "connection_cost_forecast_other": connection_cost_forecast_other,
                 "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost,
+                "new_entrants_summary": new_entrants_summary,
             },
             # connection_capacity_non_vre is popped out of manually_extracted_tables
             # into iasr_tables by create_template; supplied so the
@@ -310,10 +321,9 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df):
         "connection_cost",
         "system_strength_cost",
     }
-    # costs_connection key present with correct columns; currently
-    # generators_new_entrant is placeholder (empty) so no VRE rows are produced
-    # yet (but no errors either).
-    assert costs_connection.empty
+    assert set(costs_connection["geo_id"]) == {"Q1"}
+    assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"}
+    assert len(costs_connection) == 2
 
     # Custom-constraints tables are spliced into the output via
     # template.update(template_custom_constraints_from_plexos(...)). The
@@ -414,6 +424,13 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df):
         label,  2024-25
         IBR,    10
     """)
+    new_entrants_summary = csv_str_to_df("""
+        IASR__ID__/__DLT__names, Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
+        Q1_WH_Far__North__QLD,   Wind,                              Wind,       Wind,                Q1,              NQ
+        Q1_SAT_Far__North__QLD,  Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
+        NQ__OCGT__Small,         OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
+        NQ__Battery__2hrs,       Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+    """)
 
     with (
         patch(
@@ -443,6 +460,7 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df):
                 "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar,
                 "connection_cost_forecast_other": connection_cost_forecast_other,
                 "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost,
+                "new_entrants_summary": new_entrants_summary,
             },
             manually_extracted_tables={
                 "connection_capacity_non_vre": connection_capacity_non_vre,
@@ -484,7 +502,11 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df):
         "connection_cost",
         "system_strength_cost",
     }
-    assert costs_connection.empty
+    # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1
+    # still produce two connection-cost rows at nem_regions granularity.
+    assert set(costs_connection["geo_id"]) == {"Q1"}
+    assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"}
+    assert len(costs_connection) == 2
 
     # Custom constraints from PLEXOS are sub-regional export limits with no
     # meaningful representation once sub-regions are collapsed, so the templater
@@ -554,6 +576,13 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df):
         label,  2024-25
         IBR,    10
     """)
+    new_entrants_summary = csv_str_to_df("""
+        IASR__ID__/__DLT__names, Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
+        Q1_WH_Far__North__QLD,   Wind,                              Wind,       Wind,                Q1,              NQ
+        Q1_SAT_Far__North__QLD,  Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
+        NQ__OCGT__Small,         OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
+        NQ__Battery__2hrs,       Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+    """)
 
     with (
         patch(
@@ -581,6 +610,7 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df):
                 "connection_costs_for_wind_and_solar": connection_costs_for_wind_and_solar,
                 "connection_cost_forecast_other": connection_cost_forecast_other,
                 "efficient_level_of_system_strength_cost": efficient_level_of_system_strength_cost,
+                "new_entrants_summary": new_entrants_summary,
             },
             manually_extracted_tables={
                 "connection_capacity_non_vre": connection_capacity_non_vre,
@@ -613,6 +643,7 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df):
     assert set(expansion_costs["expansion_id"]) == {"N3-NEM"}
     # 1 expansion_id x 2 years
     assert len(expansion_costs) == 2
+
     connection_costs = result["costs_connection"]
     assert set(connection_costs.columns) == {
         "geo_id",
@@ -621,7 +652,11 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df):
         "connection_cost",
         "system_strength_cost",
     }
-    assert connection_costs.empty
+    # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1
+    # still produce two connection-cost rows at single_region granularity.
+    assert set(connection_costs["geo_id"]) == {"Q1"}
+    assert set(connection_costs["technology"]) == {"Wind", "Large scale Solar PV"}
+    assert len(connection_costs) == 2
 
     # Custom constraints from PLEXOS are sub-regional export limits with no
     # meaningful representation at single_region, so the templater skips them.
diff --git a/tests/test_templater/test_generators_new_entrant.py b/tests/test_templater/test_generators_new_entrant.py
new file mode 100644
index 00000000..9973ad33
--- /dev/null
+++ b/tests/test_templater/test_generators_new_entrant.py
@@ -0,0 +1,165 @@
+import pandas as pd
+
+from ispypsa.templater.generators_new_entrant import (
+    _IDENTITY_COLUMNS,
+    _add_resource_type,
+    _drop_storage_technologies,
+    _rename_summary_columns,
+    _set_geo_id,
+    _template_generators_new_entrant,
+)
+
+# --- _template_generators_new_entrant (orchestrator) ---
+
+
+def test_template_generators_new_entrant(csv_str_to_df):
+    # Wiring check only (per-helper behaviour is covered above): storage is dropped,
+    # the identity columns are produced, and one row per surviving generating unit
+    # is returned. Detailed content is covered by the per-helper tests.
+    new_entrants_summary = csv_str_to_df("""
+        IASR__ID__/__DLT__names,           Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
+        Q1_WH_Far__North__QLD,             Wind,                              Wind,       Wind,                Q1,              NQ
+        Q1_WM_Far__North__QLD,             Wind,                              Wind,       Wind,                Q1,              NQ
+        Q1_SAT_Far__North__QLD,            Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
+        NQ__OCGT__Small,                   OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
+        NQ__SAT__-__Distributed__Resources,Distributed__Resources__Solar,     Solar,      Solar,               Not__Applicable, NQ
+        NQ__Battery__2hrs,                 Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+    """)
+
+    result = _template_generators_new_entrant(new_entrants_summary)
+
+    # storage row dropped -> 5 of 6 rows survive; identity columns produced in order
+    assert list(result.columns) == _IDENTITY_COLUMNS
+    assert len(result) == 5
+
+
+# --- _drop_storage_technologies ---
+
+
+def test_drop_storage_technologies(csv_str_to_df):
+    # All storage variants (batteries, distributed batteries, pumped hydro) are
+    # dropped; generation rows pass through unchanged with other columns intact.
+    new_entrants_summary = csv_str_to_df("""
+        Technology__Type,                  REZ__ID
+        Wind,                              N3
+        Large__scale__Solar__PV,           N3
+        Battery__Storage__(2hrs__storage), N3
+        Distributed__Resources__Batteries, Not__Applicable
+        Pumped__Hydro__(24hrs__storage),   Not__Applicable
+        OCGT__(small__GT),                 Not__Applicable
+    """)
+
+    result = _drop_storage_technologies(new_entrants_summary)
+
+    expected = csv_str_to_df("""
+        Technology__Type,                  REZ__ID
+        Wind,                              N3
+        Large__scale__Solar__PV,           N3
+        OCGT__(small__GT),                 Not__Applicable
+    """)
+    pd.testing.assert_frame_equal(result, expected)
+
+
+def test_drop_storage_technologies_empty_input(csv_str_to_df):
+    # Empty input (all columns, no rows) returns an empty frame, no errors.
+    new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"])
+
+    result = _drop_storage_technologies(new_entrants_summary)
+
+    expected = csv_str_to_df("""
+        Technology__Type, REZ__ID
+    """)
+    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
+
+
+# --- _rename_summary_columns ---
+
+
+def test_rename_summary_columns(csv_str_to_df):
+    # The IASR ID, technology and fuel columns are renamed to their schema names;
+    # other columns (REZ ID) pass through untouched.
+    gens = csv_str_to_df("""
+        IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID
+        Q1_WH_Far__North__QLD,   Wind,             Wind,       Wind,                Q1
+        CNSW__OCGT__Small,       OCGT__(small__GT),Gas,        NSW__new__OCGT,      Not__Applicable
+    """)
+
+    result = _rename_summary_columns(gens)
+
+    expected = csv_str_to_df("""
+        name,                    technology,       fuel_type,  fuel_price_mapping,  REZ__ID
+        Q1_WH_Far__North__QLD,   Wind,             Wind,       Wind,                Q1
+        CNSW__OCGT__Small,       OCGT__(small__GT),Gas,        NSW__new__OCGT,      Not__Applicable
+    """)
+    pd.testing.assert_frame_equal(result, expected)
+
+
+# --- _set_geo_id ---
+
+
+def test_set_geo_id(csv_str_to_df):
+    # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal and
+    # distributed rows ("Not Applicable") fall back to their Sub-region.
+    gens = csv_str_to_df("""
+        technology,                   REZ__ID,         Sub-region
+        Wind,                         N3,              CNSW
+        Large__scale__Solar__PV,      N0,              CNSW
+        OCGT__(small__GT),            Not__Applicable, NQ
+        Distributed__Resources__Solar,Not__Applicable, SQ
+    """)
+
+    result = _set_geo_id(gens)
+
+    expected = csv_str_to_df("""
+        technology,                   REZ__ID,         Sub-region, geo_id
+        Wind,                         N3,              CNSW,       N3
+        Large__scale__Solar__PV,      N0,              CNSW,       N0
+        OCGT__(small__GT),            Not__Applicable, NQ,         NQ
+        Distributed__Resources__Solar,Not__Applicable, SQ,        SQ
+    """)
+    pd.testing.assert_frame_equal(result, expected)
+
+
+def test_set_geo_id_empty_input(csv_str_to_df):
+    # Empty input still returns the geo_id column (all columns, no rows).
+    gens = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"])
+
+    result = _set_geo_id(gens)
+
+    expected = csv_str_to_df("""
+        technology, REZ__ID, Sub-region, geo_id
+    """)
+    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
+
+
+# --- _add_resource_type ---
+
+
+def test_add_resource_type(csv_str_to_df):
+    # resource_type is read from the underscore-delimited code in `name`. WH/WM are
+    # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the
+    # underscore-free thermal / distributed IDs map to NaN (blank field).
+    gens = csv_str_to_df("""
+        name,                              technology
+        Q1_WH_Far__North__QLD,             Wind
+        Q1_WM_Far__North__QLD,             Wind
+        N10_WFX_Hunter__Coast,             Wind__-__offshore__(fixed)
+        DREZ_SAT_Dubbo,                    Large__scale__Solar__PV
+        N0_CST_NSW,                        Solar__Thermal__(16hrs__storage)
+        CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar
+        CNSW__OCGT__Small,                 OCGT__(small__GT)
+    """)
+
+    result = _add_resource_type(gens)
+
+    expected = csv_str_to_df("""
+        name,                              technology,                       resource_type
+        Q1_WH_Far__North__QLD,             Wind,                             wind_high
+        Q1_WM_Far__North__QLD,             Wind,                             wind_medium
+        N10_WFX_Hunter__Coast,             Wind__-__offshore__(fixed),       wind_offshore_fixed
+        DREZ_SAT_Dubbo,                    Large__scale__Solar__PV,          solar
+        N0_CST_NSW,                        Solar__Thermal__(16hrs__storage), solar
+        CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar,
+        CNSW__OCGT__Small,                 OCGT__(small__GT),
+    """)
+    pd.testing.assert_frame_equal(result, expected)

From 1834fa9b54e1475ee782a446267e2926ef11b61d Mon Sep 17 00:00:00 2001
From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com>
Date: Fri, 19 Jun 2026 12:02:57 +1000
Subject: [PATCH 3/5] add new entrant storage identity templating

---
 src/ispypsa/templater/create_template.py      |  16 +-
 .../templater/generators_new_entrant.py       | 179 -------------
 src/ispypsa/templater/new_entrants.py         | 242 ++++++++++++++++++
 .../schemas/storage_new_entrant.yaml          |   8 -
 .../test_create_ispypsa_inputs_template.py    |  82 +++---
 .../test_generators_new_entrant.py            | 165 ------------
 tests/test_templater/test_new_entrants.py     | 200 +++++++++++++++
 7 files changed, 488 insertions(+), 404 deletions(-)
 delete mode 100644 src/ispypsa/templater/generators_new_entrant.py
 create mode 100644 src/ispypsa/templater/new_entrants.py
 delete mode 100644 tests/test_templater/test_generators_new_entrant.py
 create mode 100644 tests/test_templater/test_new_entrants.py

diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py
index d3e1fe71..67d6260c 100644
--- a/src/ispypsa/templater/create_template.py
+++ b/src/ispypsa/templater/create_template.py
@@ -21,9 +21,6 @@
     _template_sub_regional_flow_path_costs,
     _template_sub_regional_flow_paths,
 )
-from ispypsa.templater.generators_new_entrant import (
-    _template_generators_new_entrant,
-)
 from ispypsa.templater.geography import _template_network_geography
 from ispypsa.templater.network_expansion import (
     _extract_flow_path_costs_from_iasr,
@@ -33,6 +30,10 @@
     _filter_flow_path_augmentations_to_granularity,
     _template_network_expansion,
 )
+from ispypsa.templater.new_entrants import (
+    _template_generators_new_entrant,
+    _template_storage_new_entrant,
+)
 from ispypsa.templater.nodes import (
     _template_regions,
     _template_sub_regions,
@@ -231,14 +232,13 @@ def create_ispypsa_inputs_template(
             "connection_capacity_non_vre"
         ].copy()
 
-        # Identity columns only for now (name, technology, resource_type, geo_id,
-        # fuel_type, fuel_price_mapping); cost/property columns are added in later
-        # PRs. Feeds costs_connection but is not yet a written template output.
+        # Identity columns only for now - not yet a templater output
         generators_new_entrant = _template_generators_new_entrant(
             iasr_tables["new_entrants_summary"]
         )
-        # storage_new_entrant remains defined (empty) for wiring tests
-        storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
+        storage_new_entrant = _template_storage_new_entrant(
+            iasr_tables["new_entrants_summary"]
+        )
         template["costs_connection"] = _template_connection_costs(
             iasr_tables,
             scenario,
diff --git a/src/ispypsa/templater/generators_new_entrant.py b/src/ispypsa/templater/generators_new_entrant.py
deleted file mode 100644
index c0b73e99..00000000
--- a/src/ispypsa/templater/generators_new_entrant.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Templates the `generators_new_entrant` table: one row per new entrant generating
-# unit, with storage technologies excluded (those are templated separately into the
-# storage new-entrants table). See schemas/generators_new_entrant.yaml for the target.
-import logging
-
-import pandas as pd
-
-from ispypsa.templater.helpers import _where_any_substring_appears
-
-_IDENTITY_COLUMNS = [
-    "name",
-    "technology",
-    "resource_type",
-    "geo_id",
-    "fuel_type",
-    "fuel_price_mapping",
-]
-
-_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"]
-
-# Source (IASR new_entrants_summary) column names → schema output column names.
-# The summary's own values are treated as canonical; no cross-table canonicalisation
-# is applied here. "IASR ID / DLT names" is an existing unique identifier per row.
-_SUMMARY_COLUMN_RENAMES = {
-    "IASR ID / DLT names": "name",
-    "Technology Type": "technology",
-    "Fuel type": "fuel_type",
-    "Fuel cost mapping": "fuel_price_mapping",
-}
-
-# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a
-# mapping for it if/when resource_limits templating requires one.
-_RESOURCE_QUALITY_CODE_TO_TYPE = {
-    "WH": "wind_high",
-    "WM": "wind_medium",
-    "WFX": "wind_offshore_fixed",
-    "WFL": "wind_offshore_floating",
-    "SAT": "solar",
-    "CST": "solar",
-}
-
-
-# NOTE: partial scope intentional - other columns to be added in next PRs!
-def _template_generators_new_entrant(
-    new_entrants_summary: pd.DataFrame,
-) -> pd.DataFrame:
-    """Templates the new entrant generators identity table from the IASR summary.
-
-    Drops storage, renames the carried-over summary columns to schema names, derives
-    geo_id (REZ ID or sub-region) and resource_type (from the VRE resource code in
-    the IASR ID), and returns the identity columns.
-
-    Args:
-        new_entrants_summary: IASR ``new_entrants_summary`` table.
-
-    Returns:
-        One row per generating unit with columns ``_IDENTITY_COLUMNS``.
-    """
-    logging.info("Creating a template for new entrant generators")
-    gens = _drop_storage_technologies(new_entrants_summary)
-    gens = _rename_summary_columns(gens)
-    gens = _set_geo_id(gens)
-    gens = _add_resource_type(gens)
-    return gens[_IDENTITY_COLUMNS]
-
-
-def _drop_storage_technologies(new_entrants_summary: pd.DataFrame) -> pd.DataFrame:
-    """Drops storage rows from the new entrants summary, keeping only generators.
-
-    Storage (batteries, distributed batteries, pumped hydro) is templated into the
-    storage new-entrants table, so it is removed here. Matching is case-insensitive
-    on the "Technology Type" column (see ``_STORAGE_TECHNOLOGY_STRINGS``).
-
-    I/O Example:
-        new_entrants_summary:
-            Technology Type                  REZ ID
-            Wind                             N3
-            Large scale Solar PV             N3
-            Battery Storage (2hrs storage)   N3       # storage: dropped
-            Distributed Resources Batteries  Not Applicable  # storage: dropped
-            Pumped Hydro (24hrs storage)     Not Applicable  # storage: dropped
-            OCGT (small GT)                  Not Applicable
-
-        returns:
-            Technology Type                  REZ ID
-            Wind                             N3
-            Large scale Solar PV             N3
-            OCGT (small GT)                  Not Applicable
-    """
-    is_storage = _where_any_substring_appears(
-        new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS
-    )
-    return new_entrants_summary.loc[~is_storage].reset_index(drop=True)
-
-
-def _rename_summary_columns(gens: pd.DataFrame) -> pd.DataFrame:
-    """Renames the summary's identifier, technology and fuel columns to schema names.
-
-    See ``_SUMMARY_COLUMN_RENAMES``. Other columns (e.g. "REZ ID", "Sub-region",
-    still needed to derive geo_id) pass through untouched.
-
-    I/O Example:
-        gens:
-            IASR ID / DLT names   Technology Type   Fuel type   Fuel cost mapping   REZ ID
-            Q1_WH_Far North QLD   Wind              Wind        Wind                Q1
-            CNSW OCGT Small       OCGT (small GT)   Gas         NSW new OCGT        Not Applicable
-
-        returns:
-            name                  technology        fuel_type   fuel_price_mapping  REZ ID
-            Q1_WH_Far North QLD   Wind              Wind        Wind                Q1
-            CNSW OCGT Small       OCGT (small GT)   Gas         NSW new OCGT        Not Applicable
-    """
-    return gens.rename(columns=_SUMMARY_COLUMN_RENAMES)
-
-
-def _set_geo_id(gens: pd.DataFrame) -> pd.DataFrame:
-    """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region.
-
-    REZ-located generators (VRE) carry a real "REZ ID"; thermal and distributed
-    resource rows have "REZ ID" == "Not Applicable" and sit at the sub-region, so
-    they take their "Sub-region" value instead. Non-REZ IDs (e.g. N0, V0) flow
-    through unchanged as REZ IDs.
-
-    I/O Example:
-        gens:
-            technology             REZ ID           Sub-region
-            Wind                   N3               CNSW
-            Large scale Solar PV   N0               CNSW       # Non-REZ: kept as-is
-            OCGT (small GT)        Not Applicable   NQ
-            Distributed Resources Solar  Not Applicable  SQ
-
-        returns (adds geo_id):
-            technology             REZ ID           Sub-region  geo_id
-            Wind                   N3               CNSW        N3
-            Large scale Solar PV   N0               CNSW        N0
-            OCGT (small GT)        Not Applicable   NQ          NQ
-            Distributed Resources Solar  Not Applicable  SQ     SQ
-    """
-    gens = gens.copy()
-    gens["geo_id"] = gens["REZ ID"].where(
-        gens["REZ ID"] != "Not Applicable", gens["Sub-region"]
-    )
-    return gens
-
-
-def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame:
-    """Adds the VRE ``resource_type`` column from the resource code in ``name``.
-
-    VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH"
-    in "Q1_WH_Far North QLD" (wind high) or "SAT" in "DREZ_SAT_Dubbo" (solar). The
-    code is extracted and mapped via ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with
-    no matching code — the underscore-free thermal and distributed-resource rows —
-    get NaN, meaning no VRE build-limit applies.
-
-    I/O Example:
-        gens:
-            name                              technology
-            Q1_WH_Far North QLD               Wind
-            Q1_WM_Far North QLD               Wind
-            N10_WFX_Hunter Coast              Wind - offshore (fixed)
-            DREZ_SAT_Dubbo                    Large scale Solar PV
-            N0_CST_NSW                        Solar Thermal (16hrs storage)
-            CNSW SAT - Distributed Resources  Distributed Resources Solar
-            CNSW OCGT Small                   OCGT (small GT)
-
-        returns (adds resource_type):
-            name                              technology                     resource_type
-            Q1_WH_Far North QLD               Wind                           wind_high
-            Q1_WM_Far North QLD               Wind                           wind_medium
-            N10_WFX_Hunter Coast              Wind - offshore (fixed)        wind_offshore_fixed
-            DREZ_SAT_Dubbo                    Large scale Solar PV           solar
-            N0_CST_NSW                        Solar Thermal (16hrs storage)  solar  # CST -> solar
-            CNSW SAT - Distributed Resources  Distributed Resources Solar   NaN  # no _ token
-            CNSW OCGT Small                   OCGT (small GT)                NaN  # no _ token
-    """
-    gens = gens.copy()
-    resource_code = gens["name"].str.extract(r"_(WH|WM|WFX|WFL|SAT|CST)_", expand=False)
-    gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE)
-    return gens
diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py
new file mode 100644
index 00000000..2e5f6e89
--- /dev/null
+++ b/src/ispypsa/templater/new_entrants.py
@@ -0,0 +1,242 @@
+"""Templates the new entrant generator and storage identity tables.
+
+Both tables are currently built from a single IASR input, the ``new_entrants_summary``
+table. This module splits that table into its two halves and shapes each into the
+identity columns of its target schema (see schemas/generators_new_entrant.yaml and
+schemas/storage_new_entrant.yaml).
+
+There are two independent public orchestrators, one per output table, each taking
+the full summary. They share the same shape:
+    1. Filter the summary to the relevant technology group — generators or storage
+       — with _filter_to_technology_group. The storage predicate (battery, pumped
+       hydro — see _STORAGE_TECHNOLOGY_STRINGS) lives in that one function, so the
+       two orchestrators can't drift out of sync on what counts as storage.
+    2. Rename the carried-over summary columns to their schema names
+       (_SUMMARY_COLUMN_RENAMES). The summary's own values are treated as
+       canonical; no cross-table canonicalisation is applied here.
+    3. Derive geo_id: REZ-located units (most VRE, REZ-co-located batteries) take
+       their REZ ID; sub-region-located units (thermal, pumped hydro, distributed
+       resources) have "REZ ID" == "Not Applicable" and take their Sub-region.
+    4. (Generators only) Derive resource_type from the resource-quality code
+       embedded in the IASR ID — see _add_resource_type.
+    5. Select the table's identity columns. _SUMMARY_COLUMN_RENAMES is shared, so
+       a column it renames that this table does not want (fuel_price_mapping for
+       storage) simply falls away here.
+
+Reference detail:
+    - _SUMMARY_COLUMN_RENAMES: source column -> schema column, shared across both
+      tables; each table keeps only the renamed columns its identity list needs.
+    - _STORAGE_TECHNOLOGY_STRINGS: the substrings that mark a row as storage.
+    - _RESOURCE_QUALITY_CODE_TO_TYPE: VRE resource-quality code -> resource_type.
+"""
+
+import logging
+
+import pandas as pd
+
+from ispypsa.templater.helpers import _where_any_substring_appears
+
+_GENERATOR_IDENTITY_COLUMNS = [
+    "name",
+    "technology",
+    "resource_type",
+    "geo_id",
+    "fuel_type",
+    "fuel_price_mapping",
+]
+
+_STORAGE_IDENTITY_COLUMNS = [
+    "name",
+    "technology",
+    "geo_id",
+    "fuel_type",
+]
+
+_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"]
+
+# Source (IASR new_entrants_summary) column names → schema output column names.
+_SUMMARY_COLUMN_RENAMES = {
+    "IASR ID / DLT names": "name",
+    "Technology Type": "technology",
+    "Fuel type": "fuel_type",
+    "Fuel cost mapping": "fuel_price_mapping",
+}
+
+# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a
+# mapping for it if/when resource_limits templating requires one.
+_RESOURCE_QUALITY_CODE_TO_TYPE = {
+    "WH": "wind_high",
+    "WM": "wind_medium",
+    "WFX": "wind_offshore_fixed",
+    "WFL": "wind_offshore_floating",
+    "SAT": "solar",
+    "CST": "solar",
+}
+
+# Extraction pattern for the resource-quality code embedded between underscores in
+# a VRE IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast".
+_RESOURCE_CODE_PATTERN = "_({})_".format(
+    "|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True))
+)
+
+
+# --- public orchestrators ---
+
+
+# NOTE: partial scope intentional - other columns to be added in next PRs!
+def _template_generators_new_entrant(
+    new_entrants_summary: pd.DataFrame,
+) -> pd.DataFrame:
+    """Templates the new entrant generators identity table from the IASR summary.
+
+    Keeps only generator rows, renames the carried-over summary columns to schema
+    names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE
+    resource code in the IASR ID), and returns the identity columns.
+
+    Args:
+        new_entrants_summary: IASR ``new_entrants_summary`` table.
+
+    Returns:
+        One row per generating unit with columns ``_GENERATOR_IDENTITY_COLUMNS``.
+    """
+    logging.info("Creating a template for new entrant generators")
+    gens = _filter_to_technology_group(new_entrants_summary, "generators")
+    gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES)
+    gens = _set_geo_id(gens)
+    gens = _add_resource_type(gens)
+    return gens[_GENERATOR_IDENTITY_COLUMNS]
+
+
+# NOTE: partial scope intentional - other columns to be added in next PRs!
+def _template_storage_new_entrant(
+    new_entrants_summary: pd.DataFrame,
+) -> pd.DataFrame:
+    """Templates the new entrant storage identity table from the IASR summary.
+
+    Keeps only storage rows, renames the carried-over summary columns to schema
+    names, derives geo_id (REZ ID or sub-region), and returns the identity columns.
+
+    Args:
+        new_entrants_summary: IASR ``new_entrants_summary`` table.
+
+    Returns:
+        One row per storage unit with columns ``_STORAGE_IDENTITY_COLUMNS``.
+    """
+    logging.info("Creating a template for new entrant storage")
+    storage = _filter_to_technology_group(new_entrants_summary, "storage")
+    storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES)
+    storage = _set_geo_id(storage)
+    return storage[_STORAGE_IDENTITY_COLUMNS]
+
+
+# --- shared helpers ---
+
+
+def _filter_to_technology_group(
+    new_entrants_summary: pd.DataFrame, group: str
+) -> pd.DataFrame:
+    """Returns the summary rows for one technology group: generators or storage.
+
+    Storage rows are those whose "Technology Type" contains a
+    ``_STORAGE_TECHNOLOGY_STRINGS`` substring (battery, pumped hydro), matched
+    case-insensitively; generators are every other row. The two groups partition
+    the summary, so this single predicate is the only place the generator/storage
+    boundary is defined.
+
+    Args:
+        new_entrants_summary: the IASR ``new_entrants_summary`` table (any frame
+            with a "Technology Type" column).
+        group: "generators" or "storage".
+
+    I/O Example:
+        new_entrants_summary:
+            Technology Type                  REZ ID
+            Wind                             N3
+            Battery Storage (2hrs storage)   N3
+            Pumped Hydro (24hrs storage)     Not Applicable
+            OCGT (small GT)                  Not Applicable
+
+        group="generators" returns:
+            Technology Type                  REZ ID
+            Wind                             N3
+            OCGT (small GT)                  Not Applicable
+
+        group="storage" returns:
+            Technology Type                  REZ ID
+            Battery Storage (2hrs storage)   N3
+            Pumped Hydro (24hrs storage)     Not Applicable
+    """
+    is_storage = _where_any_substring_appears(
+        new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS
+    )
+    if group == "storage":
+        return new_entrants_summary.loc[is_storage].reset_index(drop=True)
+    if group == "generators":
+        return new_entrants_summary.loc[~is_storage].reset_index(drop=True)
+    raise ValueError(
+        "Filtering new entrants table to technology group: "
+        f"group must be 'generators' or 'storage', got {group!r}"
+    )
+
+
+def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame:
+    """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region.
+
+    I/O Example:
+        new_entrants:
+            technology                       REZ ID           Sub-region
+            Wind                             N3               CNSW
+            Large scale Solar PV             N0               CNSW       # Non-REZ: kept as-is
+            OCGT (small GT)                  Not Applicable   NQ
+            Pumped Hydro (24hrs storage)     Not Applicable   SNW
+
+        returns (adds geo_id):
+            technology                       REZ ID           Sub-region  geo_id
+            Wind                             N3               CNSW        N3
+            Large scale Solar PV             N0               CNSW        N0
+            OCGT (small GT)                  Not Applicable   NQ          NQ
+            Pumped Hydro (24hrs storage)     Not Applicable   SNW         SNW
+    """
+    new_entrants = new_entrants.copy()
+    new_entrants["geo_id"] = new_entrants["REZ ID"].where(
+        new_entrants["REZ ID"] != "Not Applicable", new_entrants["Sub-region"]
+    )
+    return new_entrants
+
+
+# --- generator-specific helpers ---
+
+
+def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame:
+    """Adds the VRE ``resource_type`` column from the resource code in ``name``.
+
+    VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH"
+    in "Q1_WH_Far North QLD". The code is extracted and mapped via
+    ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with no matching code — the underscore-
+    free thermal and distributed-resource rows — get NaN.
+
+    I/O Example:
+        gens:
+            name                              technology
+            Q1_WH_Far North QLD               Wind
+            Q1_WM_Far North QLD               Wind
+            N10_WFX_Hunter Coast              Wind - offshore (fixed)
+            DREZ_SAT_Dubbo                    Large scale Solar PV
+            N0_CST_NSW                        Solar Thermal (16hrs storage)
+            CNSW SAT - Distributed Resources  Distributed Resources Solar
+            CNSW OCGT Small                   OCGT (small GT)
+
+        returns (adds resource_type):
+            name                              technology                     resource_type
+            Q1_WH_Far North QLD               Wind                           wind_high
+            Q1_WM_Far North QLD               Wind                           wind_medium
+            N10_WFX_Hunter Coast              Wind - offshore (fixed)        wind_offshore_fixed
+            DREZ_SAT_Dubbo                    Large scale Solar PV           solar
+            N0_CST_NSW                        Solar Thermal (16hrs storage)  solar  # CST -> solar
+            CNSW SAT - Distributed Resources  Distributed Resources Solar   NaN  # no _ token
+            CNSW OCGT Small                   OCGT (small GT)                NaN  # no _ token
+    """
+    gens = gens.copy()
+    resource_code = gens["name"].str.extract(_RESOURCE_CODE_PATTERN, expand=False)
+    gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE)
+    return gens
diff --git a/src/ispypsa/validation/schemas/storage_new_entrant.yaml b/src/ispypsa/validation/schemas/storage_new_entrant.yaml
index 62492bb8..a6009a56 100644
--- a/src/ispypsa/validation/schemas/storage_new_entrant.yaml
+++ b/src/ispypsa/validation/schemas/storage_new_entrant.yaml
@@ -32,14 +32,6 @@ columns:
     type: string
     required: true
     description: Unique identifier for the storage unit (e.g. IASR ID or full name).
-  power_station:
-    type: string
-    required: true
-    description: >
-      Power station name grouping storage units together.
-
-      For new entrant storage units, this is always the same as the `name` field
-      (i.e, no grouping is performed). Keeping for consistency with existing storage tables.
   technology:
     type: string
     required: true
diff --git a/tests/test_templater/test_create_ispypsa_inputs_template.py b/tests/test_templater/test_create_ispypsa_inputs_template.py
index 0c13d968..28d5400c 100644
--- a/tests/test_templater/test_create_ispypsa_inputs_template.py
+++ b/tests/test_templater/test_create_ispypsa_inputs_template.py
@@ -215,26 +215,23 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df):
         Q1,       400
     """)
     connection_cost_forecast_other = csv_str_to_df("""
-        Generator Type,            Region,  Scenario,      2024-25,    2025-26
-        Battery Storage (4h),     NSW,     Step Change,  20000000,   22000000
+        Generator Type,     Region,  Scenario,     2024-25,   2025-26
+        OCGT (small GT),    NSW,     Step Change,  20000000,  22000000
     """)
     connection_capacity_non_vre = csv_str_to_df("""
-        Region,  Generator Type,         Connection capacity (MVA)
-        NSW,     Battery Storage (4h),  400
+        Region,  Generator Type,    Connection capacity (MVA)
+        NSW,     OCGT (small GT),   400
     """)
     efficient_level_of_system_strength_cost = csv_str_to_df("""
         label,  2024-25
         IBR,    10
     """)
-    # Two VRE generators at Q1 (which has a connection cost forecast) so
-    # costs_connection produces rows; an OCGT and a storage row check that
-    # non-VRE pass through and storage is dropped.
     new_entrants_summary = csv_str_to_df("""
-        IASR__ID__/__DLT__names, Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
-        Q1_WH_Far__North__QLD,   Wind,                              Wind,       Wind,                Q1,              NQ
-        Q1_SAT_Far__North__QLD,  Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
-        NQ__OCGT__Small,         OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
-        NQ__Battery__2hrs,       Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+        IASR ID / DLT names,        Technology Type,        Fuel type,  Fuel cost mapping,  REZ ID,         Sub-region
+        Q1_WH_Far North QLD,        Wind,                   Wind,       Wind,               Q1,             NQ
+        Q1_SAT_Far North QLD,       Large scale Solar PV,   Solar,      Solar,              Q1,             NQ
+        CNSW OCGT Small,            OCGT (small GT),        Gas,        NSW new OCGT,       Not Applicable, CNSW
+        SNW OCGT Small,             OCGT (small GT),        Gas,        NSW new OCGT,       Not Applicable, SNW
     """)
 
     with (
@@ -312,7 +309,6 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df):
     # 3 expansion_ids x 2 years
     assert len(expansion_costs) == 6
 
-    assert "costs_connection" in result
     costs_connection = result["costs_connection"]
     assert set(costs_connection.columns) == {
         "geo_id",
@@ -321,9 +317,9 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df):
         "connection_cost",
         "system_strength_cost",
     }
-    assert set(costs_connection["geo_id"]) == {"Q1"}
-    assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"}
-    assert len(costs_connection) == 2
+    # [(2 VRE) x (1 REZ) + (1 non-VRE) x (2 subregions)] x 2 years
+    assert set(costs_connection["geo_id"]) == {"Q1", "CNSW", "SNW"}
+    assert len(costs_connection) == 8
 
     # Custom-constraints tables are spliced into the output via
     # template.update(template_custom_constraints_from_plexos(...)). The
@@ -413,23 +409,23 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df):
         Q1,       400
     """)
     connection_cost_forecast_other = csv_str_to_df("""
-        Generator Type,            Region,  Scenario,      2024-25,    2025-26
-        Battery Storage (4h),     NSW,     Step Change,  20000000,   22000000
+        Generator Type,     Region,  Scenario,     2024-25,   2025-26
+        OCGT (small GT),    NSW,     Step Change,  20000000,  22000000
     """)
     connection_capacity_non_vre = csv_str_to_df("""
-        Region,  Generator Type,         Connection capacity (MVA)
-        NSW,     Battery Storage (4h),  400
+        Region,  Generator Type,    Connection capacity (MVA)
+        NSW,     OCGT (small GT),   400
     """)
     efficient_level_of_system_strength_cost = csv_str_to_df("""
         label,  2024-25
         IBR,    10
     """)
     new_entrants_summary = csv_str_to_df("""
-        IASR__ID__/__DLT__names, Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
-        Q1_WH_Far__North__QLD,   Wind,                              Wind,       Wind,                Q1,              NQ
-        Q1_SAT_Far__North__QLD,  Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
-        NQ__OCGT__Small,         OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
-        NQ__Battery__2hrs,       Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+        IASR ID / DLT names,        Technology Type,        Fuel type,  Fuel cost mapping,  REZ ID,         Sub-region
+        Q1_WH_Far North QLD,        Wind,                   Wind,       Wind,               Q1,             NQ
+        Q1_SAT_Far North QLD,       Large scale Solar PV,   Solar,      Solar,              Q1,             NQ
+        CNSW OCGT Small,            OCGT (small GT),        Gas,        NSW new OCGT,       Not Applicable, CNSW
+        SNW OCGT Small,             OCGT (small GT),        Gas,        NSW new OCGT,       Not Applicable, SNW
     """)
 
     with (
@@ -494,6 +490,7 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df):
     assert set(expansion_costs["expansion_id"]) == {"NSW-QLD", "N3-NSW"}
     # 2 expansion_ids x 2 years
     assert len(expansion_costs) == 4
+
     costs_connection = result["costs_connection"]
     assert set(costs_connection.columns) == {
         "geo_id",
@@ -502,11 +499,10 @@ def test_create_ispypsa_inputs_template_new_format_nem_regions(csv_str_to_df):
         "connection_cost",
         "system_strength_cost",
     }
-    # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1
-    # still produce two connection-cost rows at nem_regions granularity.
-    assert set(costs_connection["geo_id"]) == {"Q1"}
-    assert set(costs_connection["technology"]) == {"Wind", "Large scale Solar PV"}
-    assert len(costs_connection) == 2
+    # REZ geo_ids (Q1) are granularity-invariant, subregions in same region collapse
+    # [(2 VRE x 1 REZ) + (1 non-VRE x 1 subregion)] x 2 years
+    assert set(costs_connection["geo_id"]) == {"Q1", "NSW"}
+    assert len(costs_connection) == 6
 
     # Custom constraints from PLEXOS are sub-regional export limits with no
     # meaningful representation once sub-regions are collapsed, so the templater
@@ -565,23 +561,23 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df):
         Q1,       400
     """)
     connection_cost_forecast_other = csv_str_to_df("""
-        Generator Type,            Region,  Scenario,      2024-25,    2025-26
-        Battery Storage (4h),     NSW,     Step Change,  20000000,   22000000
+        Generator Type,     Region,  Scenario,     2024-25,   2025-26
+        OCGT (small GT),    NSW,     Step Change,  20000000,  22000000
     """)
     connection_capacity_non_vre = csv_str_to_df("""
-        Region,  Generator Type,         Connection capacity (MVA)
-        NSW,     Battery Storage (4h),  400
+        Region,  Generator Type,    Connection capacity (MVA)
+        NSW,     OCGT (small GT),   400
     """)
     efficient_level_of_system_strength_cost = csv_str_to_df("""
         label,  2024-25
         IBR,    10
     """)
     new_entrants_summary = csv_str_to_df("""
-        IASR__ID__/__DLT__names, Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
-        Q1_WH_Far__North__QLD,   Wind,                              Wind,       Wind,                Q1,              NQ
-        Q1_SAT_Far__North__QLD,  Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
-        NQ__OCGT__Small,         OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
-        NQ__Battery__2hrs,       Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
+        IASR ID / DLT names,        Technology Type,        Fuel type,  Fuel cost mapping,  REZ ID,         Sub-region
+        Q1_WH_Far North QLD,        Wind,                   Wind,       Wind,               Q1,             NQ
+        Q1_SAT_Far North QLD,       Large scale Solar PV,   Solar,      Solar,              Q1,             NQ
+        CNSW OCGT Small,            OCGT (small GT),        Gas,        NSW new OCGT,       Not Applicable, CNSW
+        SNW OCGT Small,             OCGT (small GT),        Gas,        NSW new OCGT,       Not Applicable, SNW
     """)
 
     with (
@@ -652,11 +648,9 @@ def test_create_ispypsa_inputs_template_new_format_single_region(csv_str_to_df):
         "connection_cost",
         "system_strength_cost",
     }
-    # REZ geo_ids (Q1) are granularity-invariant, so the two VRE generators at Q1
-    # still produce two connection-cost rows at single_region granularity.
-    assert set(connection_costs["geo_id"]) == {"Q1"}
-    assert set(connection_costs["technology"]) == {"Wind", "Large scale Solar PV"}
-    assert len(connection_costs) == 2
+    # [(2 VRE x 1 REZ) + (1 non-VRE x NEM)] x 2 years
+    assert set(connection_costs["geo_id"]) == {"Q1", "NEM"}
+    assert len(connection_costs) == 6
 
     # Custom constraints from PLEXOS are sub-regional export limits with no
     # meaningful representation at single_region, so the templater skips them.
diff --git a/tests/test_templater/test_generators_new_entrant.py b/tests/test_templater/test_generators_new_entrant.py
deleted file mode 100644
index 9973ad33..00000000
--- a/tests/test_templater/test_generators_new_entrant.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import pandas as pd
-
-from ispypsa.templater.generators_new_entrant import (
-    _IDENTITY_COLUMNS,
-    _add_resource_type,
-    _drop_storage_technologies,
-    _rename_summary_columns,
-    _set_geo_id,
-    _template_generators_new_entrant,
-)
-
-# --- _template_generators_new_entrant (orchestrator) ---
-
-
-def test_template_generators_new_entrant(csv_str_to_df):
-    # Wiring check only (per-helper behaviour is covered above): storage is dropped,
-    # the identity columns are produced, and one row per surviving generating unit
-    # is returned. Detailed content is covered by the per-helper tests.
-    new_entrants_summary = csv_str_to_df("""
-        IASR__ID__/__DLT__names,           Technology__Type,                  Fuel__type, Fuel__cost__mapping, REZ__ID,         Sub-region
-        Q1_WH_Far__North__QLD,             Wind,                              Wind,       Wind,                Q1,              NQ
-        Q1_WM_Far__North__QLD,             Wind,                              Wind,       Wind,                Q1,              NQ
-        Q1_SAT_Far__North__QLD,            Large__scale__Solar__PV,           Solar,      Solar,               Q1,              NQ
-        NQ__OCGT__Small,                   OCGT__(small__GT),                 Gas,        QLD__new__OCGT,      Not__Applicable, NQ
-        NQ__SAT__-__Distributed__Resources,Distributed__Resources__Solar,     Solar,      Solar,               Not__Applicable, NQ
-        NQ__Battery__2hrs,                 Battery__Storage__(2hrs__storage), Battery,    Battery,             Not__Applicable, NQ
-    """)
-
-    result = _template_generators_new_entrant(new_entrants_summary)
-
-    # storage row dropped -> 5 of 6 rows survive; identity columns produced in order
-    assert list(result.columns) == _IDENTITY_COLUMNS
-    assert len(result) == 5
-
-
-# --- _drop_storage_technologies ---
-
-
-def test_drop_storage_technologies(csv_str_to_df):
-    # All storage variants (batteries, distributed batteries, pumped hydro) are
-    # dropped; generation rows pass through unchanged with other columns intact.
-    new_entrants_summary = csv_str_to_df("""
-        Technology__Type,                  REZ__ID
-        Wind,                              N3
-        Large__scale__Solar__PV,           N3
-        Battery__Storage__(2hrs__storage), N3
-        Distributed__Resources__Batteries, Not__Applicable
-        Pumped__Hydro__(24hrs__storage),   Not__Applicable
-        OCGT__(small__GT),                 Not__Applicable
-    """)
-
-    result = _drop_storage_technologies(new_entrants_summary)
-
-    expected = csv_str_to_df("""
-        Technology__Type,                  REZ__ID
-        Wind,                              N3
-        Large__scale__Solar__PV,           N3
-        OCGT__(small__GT),                 Not__Applicable
-    """)
-    pd.testing.assert_frame_equal(result, expected)
-
-
-def test_drop_storage_technologies_empty_input(csv_str_to_df):
-    # Empty input (all columns, no rows) returns an empty frame, no errors.
-    new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"])
-
-    result = _drop_storage_technologies(new_entrants_summary)
-
-    expected = csv_str_to_df("""
-        Technology__Type, REZ__ID
-    """)
-    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
-
-
-# --- _rename_summary_columns ---
-
-
-def test_rename_summary_columns(csv_str_to_df):
-    # The IASR ID, technology and fuel columns are renamed to their schema names;
-    # other columns (REZ ID) pass through untouched.
-    gens = csv_str_to_df("""
-        IASR__ID__/__DLT__names, Technology__Type, Fuel__type, Fuel__cost__mapping, REZ__ID
-        Q1_WH_Far__North__QLD,   Wind,             Wind,       Wind,                Q1
-        CNSW__OCGT__Small,       OCGT__(small__GT),Gas,        NSW__new__OCGT,      Not__Applicable
-    """)
-
-    result = _rename_summary_columns(gens)
-
-    expected = csv_str_to_df("""
-        name,                    technology,       fuel_type,  fuel_price_mapping,  REZ__ID
-        Q1_WH_Far__North__QLD,   Wind,             Wind,       Wind,                Q1
-        CNSW__OCGT__Small,       OCGT__(small__GT),Gas,        NSW__new__OCGT,      Not__Applicable
-    """)
-    pd.testing.assert_frame_equal(result, expected)
-
-
-# --- _set_geo_id ---
-
-
-def test_set_geo_id(csv_str_to_df):
-    # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal and
-    # distributed rows ("Not Applicable") fall back to their Sub-region.
-    gens = csv_str_to_df("""
-        technology,                   REZ__ID,         Sub-region
-        Wind,                         N3,              CNSW
-        Large__scale__Solar__PV,      N0,              CNSW
-        OCGT__(small__GT),            Not__Applicable, NQ
-        Distributed__Resources__Solar,Not__Applicable, SQ
-    """)
-
-    result = _set_geo_id(gens)
-
-    expected = csv_str_to_df("""
-        technology,                   REZ__ID,         Sub-region, geo_id
-        Wind,                         N3,              CNSW,       N3
-        Large__scale__Solar__PV,      N0,              CNSW,       N0
-        OCGT__(small__GT),            Not__Applicable, NQ,         NQ
-        Distributed__Resources__Solar,Not__Applicable, SQ,        SQ
-    """)
-    pd.testing.assert_frame_equal(result, expected)
-
-
-def test_set_geo_id_empty_input(csv_str_to_df):
-    # Empty input still returns the geo_id column (all columns, no rows).
-    gens = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"])
-
-    result = _set_geo_id(gens)
-
-    expected = csv_str_to_df("""
-        technology, REZ__ID, Sub-region, geo_id
-    """)
-    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
-
-
-# --- _add_resource_type ---
-
-
-def test_add_resource_type(csv_str_to_df):
-    # resource_type is read from the underscore-delimited code in `name`. WH/WM are
-    # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the
-    # underscore-free thermal / distributed IDs map to NaN (blank field).
-    gens = csv_str_to_df("""
-        name,                              technology
-        Q1_WH_Far__North__QLD,             Wind
-        Q1_WM_Far__North__QLD,             Wind
-        N10_WFX_Hunter__Coast,             Wind__-__offshore__(fixed)
-        DREZ_SAT_Dubbo,                    Large__scale__Solar__PV
-        N0_CST_NSW,                        Solar__Thermal__(16hrs__storage)
-        CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar
-        CNSW__OCGT__Small,                 OCGT__(small__GT)
-    """)
-
-    result = _add_resource_type(gens)
-
-    expected = csv_str_to_df("""
-        name,                              technology,                       resource_type
-        Q1_WH_Far__North__QLD,             Wind,                             wind_high
-        Q1_WM_Far__North__QLD,             Wind,                             wind_medium
-        N10_WFX_Hunter__Coast,             Wind__-__offshore__(fixed),       wind_offshore_fixed
-        DREZ_SAT_Dubbo,                    Large__scale__Solar__PV,          solar
-        N0_CST_NSW,                        Solar__Thermal__(16hrs__storage), solar
-        CNSW__SAT__-__Distributed__Resources, Distributed__Resources__Solar,
-        CNSW__OCGT__Small,                 OCGT__(small__GT),
-    """)
-    pd.testing.assert_frame_equal(result, expected)
diff --git a/tests/test_templater/test_new_entrants.py b/tests/test_templater/test_new_entrants.py
new file mode 100644
index 00000000..f211f4f2
--- /dev/null
+++ b/tests/test_templater/test_new_entrants.py
@@ -0,0 +1,200 @@
+import pandas as pd
+import pytest
+
+from ispypsa.templater.new_entrants import (
+    _GENERATOR_IDENTITY_COLUMNS,
+    _STORAGE_IDENTITY_COLUMNS,
+    _add_resource_type,
+    _filter_to_technology_group,
+    _set_geo_id,
+    _template_generators_new_entrant,
+    _template_storage_new_entrant,
+)
+
+# --- orchestrators ---
+
+
+def test_template_generators_new_entrant(csv_str_to_df):
+    # Wiring check only (per-helper behaviour is covered below): storage is dropped,
+    # the identity columns are produced, and one row per surviving generating unit
+    # is returned. Detailed content is covered by the per-helper tests.
+    new_entrants_summary = csv_str_to_df("""
+        IASR ID / DLT names,            Technology Type,                Fuel type,  Fuel cost mapping,  REZ ID,         Sub-region
+        Q1_WH_Far North QLD,            Wind,                           Wind,       Wind,               Q1,             NQ
+        Q1_WM_Far North QLD,            Wind,                           Wind,       Wind,               Q1,             NQ
+        Q1_SAT_Far North QLD,           Large scale Solar PV,           Solar,      Solar,              Q1,             NQ
+        NQ OCGT Small,                  OCGT (small GT),                Gas,        QLD new OCGT,       Not Applicable, NQ
+        NQ SAT - Distributed Resources, Distributed Resources Solar,    Solar,      Solar,              Not Applicable, NQ
+        NQ Battery 2hrs,                Battery Storage (2hrs storage), Battery,    Battery,            Not Applicable, NQ
+    """)
+
+    result = _template_generators_new_entrant(new_entrants_summary)
+
+    # storage row dropped -> 5 of 6 rows survive; identity columns produced in order
+    assert list(result.columns) == _GENERATOR_IDENTITY_COLUMNS
+    assert len(result) == 5
+
+
+def test_template_storage_new_entrant(csv_str_to_df):
+    # Wiring check only (per-helper behaviour is covered below): generators are
+    # dropped, the identity columns are produced, and one row per surviving storage
+    # unit is returned. Detailed content is covered by the per-helper tests.
+    new_entrants_summary = csv_str_to_df("""
+        IASR ID / DLT names,            Technology Type,                 Fuel type,  Fuel cost mapping,  REZ ID,         Sub-region
+        Q1_WH_Far North QLD,            Wind,                            Wind,       Wind,               Q1,             NQ
+        NQ OCGT Small,                  OCGT (small GT),                 Gas,        QLD new OCGT,       Not Applicable, NQ
+        NQ Battery 2hrs,                Battery Storage (2hrs storage),  Battery,    Battery,            N3,             NQ
+        NQ Battery - Distributed,       Distributed Resources Batteries, Battery,    Battery,            Not Applicable, NQ
+        Snowy PH 24hr,                  Pumped Hydro (24hrs storage),    Water,      Water,              Not Applicable, NQ
+    """)
+
+    result = _template_storage_new_entrant(new_entrants_summary)
+
+    # generator rows dropped -> 3 of 5 rows survive; identity columns produced in order
+    assert list(result.columns) == _STORAGE_IDENTITY_COLUMNS
+    assert len(result) == 3
+
+
+# --- _filter_to_technology_group ---
+
+
+def test_filter_to_technology_group(csv_str_to_df):
+    # test core split/filter function returns both groups correctly
+    new_entrants_summary = csv_str_to_df("""
+        Technology Type,                    REZ ID
+        Wind,                               N3
+        Large scale Solar PV,               N3
+        Battery Storage (2hrs storage),     N3
+        Distributed Resources Batteries,    Not Applicable
+        Pumped Hydro (24hrs storage),       Not Applicable
+        OCGT (small GT),                    Not Applicable
+    """)
+
+    # All storage variants (batteries, distributed batteries, pumped hydro) are
+    # dropped; generation rows pass through unchanged with other columns intact.
+    generators = _filter_to_technology_group(new_entrants_summary, "generators")
+
+    expected_gens = csv_str_to_df("""
+        Technology Type,        REZ ID
+        Wind,                   N3
+        Large scale Solar PV,   N3
+        OCGT (small GT),        Not Applicable
+    """)
+    pd.testing.assert_frame_equal(generators, expected_gens)
+
+    # Only storage variants (batteries, distributed batteries, pumped hydro) are
+    # kept - unchanged, with other columns intact.
+    storage = _filter_to_technology_group(new_entrants_summary, "storage")
+
+    expected_storage = csv_str_to_df("""
+        Technology Type,                    REZ ID
+        Battery Storage (2hrs storage),     N3
+        Distributed Resources Batteries,    Not Applicable
+        Pumped Hydro (24hrs storage),       Not Applicable
+    """)
+    pd.testing.assert_frame_equal(storage, expected_storage)
+
+
+def test_filter_to_technology_group_raises_unknown_group(csv_str_to_df, caplog):
+    # Raises on non-permitted 'group' arg (not "generators" or "storage")
+    new_entrants_summary = csv_str_to_df("""
+        Technology Type,                    REZ ID
+        Wind,                               N3
+        Pumped Hydro (24hrs storage),       Not Applicable
+    """)
+
+    with pytest.raises(ValueError, match="group must be 'generators' or 'storage'"):
+        _filter_to_technology_group(new_entrants_summary, "computers")
+
+
+def test_filter_to_technology_group_empty_input(csv_str_to_df):
+    # Empty input (all columns, no rows) returns an empty frame, no errors.
+    new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"])
+
+    result = _filter_to_technology_group(new_entrants_summary, "storage")
+
+    expected = csv_str_to_df("""
+        Technology Type, REZ ID
+    """)
+    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
+
+
+# --- _set_geo_id ---
+
+
+def test_set_geo_id(csv_str_to_df):
+    # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal, pumped hydro
+    # and distributed rows ("Not Applicable") fall back to their Sub-region.
+    new_entrants = csv_str_to_df("""
+        technology,                     REZ ID,         Sub-region
+        Wind,                           N3,             CNSW
+        Large scale Solar PV,           N0,             CNSW
+        OCGT (small GT),                Not Applicable, NQ
+        Pumped Hydro (24hrs storage),   Not Applicable, SNW
+    """)
+
+    result = _set_geo_id(new_entrants)
+
+    expected = csv_str_to_df("""
+        technology,                     REZ ID,         Sub-region, geo_id
+        Wind,                           N3,             CNSW,       N3
+        Large scale Solar PV,           N0,             CNSW,       N0
+        OCGT (small GT),                Not Applicable, NQ,         NQ
+        Pumped Hydro (24hrs storage),   Not Applicable, SNW,        SNW
+    """)
+    pd.testing.assert_frame_equal(result, expected)
+
+
+def test_set_geo_id_empty_input(csv_str_to_df):
+    # Empty input still returns the geo_id column (all columns, no rows).
+    new_entrants = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"])
+
+    result = _set_geo_id(new_entrants)
+
+    expected = csv_str_to_df("""
+        technology, REZ ID, Sub-region, geo_id
+    """)
+    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
+
+
+# --- _add_resource_type (generator-specific) ---
+
+
+def test_add_resource_type(csv_str_to_df):
+    # resource_type is read from the underscore-delimited code in `name`. WH/WM are
+    # already separate rows (no explosion). CST (solar thermal) maps to "solar"; the
+    # underscore-free thermal / distributed IDs map to NaN (blank field).
+    gens = csv_str_to_df("""
+        name,                               technology
+        Q1_WH_Far North QLD,                Wind
+        Q1_WM_Far North QLD,                Wind
+        N10_WFX_Hunter Coast,               Wind - offshore (fixed)
+        DREZ_SAT_Dubbo,                     Large scale Solar PV
+        N0_CST_NSW,                         Solar Thermal (16hrs storage)
+        CNSW SAT - Distributed Resources,   Distributed Resources Solar
+        CNSW OCGT Small,                    OCGT (small GT)
+    """)
+
+    result = _add_resource_type(gens)
+
+    expected = csv_str_to_df("""
+        name,                               technology,                     resource_type
+        Q1_WH_Far North QLD,                Wind,                           wind_high
+        Q1_WM_Far North QLD,                Wind,                           wind_medium
+        N10_WFX_Hunter Coast,               Wind - offshore (fixed),        wind_offshore_fixed
+        DREZ_SAT_Dubbo,                     Large scale Solar PV,           solar
+        N0_CST_NSW,                         Solar Thermal (16hrs storage),  solar
+        CNSW SAT - Distributed Resources,   Distributed Resources Solar,
+        CNSW OCGT Small,                    OCGT (small GT),
+    """)
+    pd.testing.assert_frame_equal(result, expected)
+
+
+def test_add_resource_type_empty_input():
+    # test empty input still returns the input df columns + resource_type column
+    empty_input = pd.DataFrame(columns=["name", "technology"])
+
+    result = _add_resource_type(empty_input)
+
+    expected = pd.DataFrame(columns=["name", "technology", "resource_type"])
+    pd.testing.assert_frame_equal(result, expected)

From e7a097ebfa812a3d5e01b3656e1024aaafd10c63 Mon Sep 17 00:00:00 2001
From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com>
Date: Fri, 19 Jun 2026 14:38:45 +1000
Subject: [PATCH 4/5] clean up docstrings and add extra i/o examples

---
 src/ispypsa/templater/new_entrants.py | 53 ++++++++++++---------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py
index 2e5f6e89..188217ea 100644
--- a/src/ispypsa/templater/new_entrants.py
+++ b/src/ispypsa/templater/new_entrants.py
@@ -1,33 +1,17 @@
 """Templates the new entrant generator and storage identity tables.
 
 Both tables are currently built from a single IASR input, the ``new_entrants_summary``
-table. This module splits that table into its two halves and shapes each into the
+table. This module splits that table into its two subsets and shapes each into the
 identity columns of its target schema (see schemas/generators_new_entrant.yaml and
 schemas/storage_new_entrant.yaml).
 
 There are two independent public orchestrators, one per output table, each taking
 the full summary. They share the same shape:
-    1. Filter the summary to the relevant technology group — generators or storage
-       — with _filter_to_technology_group. The storage predicate (battery, pumped
-       hydro — see _STORAGE_TECHNOLOGY_STRINGS) lives in that one function, so the
-       two orchestrators can't drift out of sync on what counts as storage.
+    1. Filter the summary to the relevant technology group
     2. Rename the carried-over summary columns to their schema names
-       (_SUMMARY_COLUMN_RENAMES). The summary's own values are treated as
-       canonical; no cross-table canonicalisation is applied here.
-    3. Derive geo_id: REZ-located units (most VRE, REZ-co-located batteries) take
-       their REZ ID; sub-region-located units (thermal, pumped hydro, distributed
-       resources) have "REZ ID" == "Not Applicable" and take their Sub-region.
-    4. (Generators only) Derive resource_type from the resource-quality code
-       embedded in the IASR ID — see _add_resource_type.
-    5. Select the table's identity columns. _SUMMARY_COLUMN_RENAMES is shared, so
-       a column it renames that this table does not want (fuel_price_mapping for
-       storage) simply falls away here.
-
-Reference detail:
-    - _SUMMARY_COLUMN_RENAMES: source column -> schema column, shared across both
-      tables; each table keeps only the renamed columns its identity list needs.
-    - _STORAGE_TECHNOLOGY_STRINGS: the substrings that mark a row as storage.
-    - _RESOURCE_QUALITY_CODE_TO_TYPE: VRE resource-quality code -> resource_type.
+    3. Derive geo_id
+    4. (Generators only) Derive resource_type
+    5. Select the table's group-specific identity columns.
 """
 
 import logging
@@ -93,11 +77,18 @@ def _template_generators_new_entrant(
     names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE
     resource code in the IASR ID), and returns the identity columns.
 
-    Args:
-        new_entrants_summary: IASR ``new_entrants_summary`` table.
+    I/O Example:
+        new_entrants_summary (abbr.):
+            IASR ID     Power Station   Technology Type REZ ID          Sub-region  Fuel type   Fuel cost mapping
+            N3_WH_rez   N3_WH_rez       Wind            N3              NNSW        Wind        Wind
+            N3 Battery  N3 Battery      Battery (2hrs)  N3              NNSW        Battery     Battery
+            SQ CCGT     SQ CCGT         CCGT            Not Applicable  SQ          Gas         QLD new CCGT
 
     Returns:
-        One row per generating unit with columns ``_GENERATOR_IDENTITY_COLUMNS``.
+        name        technology  resource_type   geo_id  fuel_type   fuel_price_mapping
+        N3_WH_rez   Wind        wind_high       N3      Wind        Wind
+        SQ CCGT     CCGT                        SQ      Gas         QLD new CCGT
+
     """
     logging.info("Creating a template for new entrant generators")
     gens = _filter_to_technology_group(new_entrants_summary, "generators")
@@ -116,11 +107,16 @@ def _template_storage_new_entrant(
     Keeps only storage rows, renames the carried-over summary columns to schema
     names, derives geo_id (REZ ID or sub-region), and returns the identity columns.
 
-    Args:
-        new_entrants_summary: IASR ``new_entrants_summary`` table.
+    I/O Example:
+        new_entrants_summary (abbr.):
+            IASR ID     Power Station   Technology Type REZ ID          Sub-region  Fuel type   Fuel cost mapping
+            N3_WH_rez   N3_WH_rez       Wind            N3              NNSW        Wind        Wind
+            N3 Battery  N3 Battery      Battery (2hrs)  N3              NNSW        Battery     Battery
+            SQ CCGT     SQ CCGT         CCGT            Not Applicable  SQ          Gas         QLD new CCGT
 
     Returns:
-        One row per storage unit with columns ``_STORAGE_IDENTITY_COLUMNS``.
+        name        technology      geo_id  fuel_type
+        N3 Battery  Battery (2hrs)  N3      Battery
     """
     logging.info("Creating a template for new entrant storage")
     storage = _filter_to_technology_group(new_entrants_summary, "storage")
@@ -144,8 +140,7 @@ def _filter_to_technology_group(
     boundary is defined.
 
     Args:
-        new_entrants_summary: the IASR ``new_entrants_summary`` table (any frame
-            with a "Technology Type" column).
+        new_entrants_summary: the IASR ``new_entrants_summary`` table
         group: "generators" or "storage".
 
     I/O Example:

From aba1ad028e74fdae27c84b5966e3f40f92fa5482 Mon Sep 17 00:00:00 2001
From: EllieKallmier <61219730+EllieKallmier@users.noreply.github.com>
Date: Wed, 24 Jun 2026 17:11:26 +1000
Subject: [PATCH 5/5] pull out common helpers to set geo_id and check battery
 and storage rows

---
 .../custom_constraints_from_plexos.py         | 25 +-----
 src/ispypsa/templater/helpers.py              | 45 ++++++++++
 src/ispypsa/templater/new_entrants.py         | 84 +++----------------
 .../test_custom_constraints_from_plexos.py    | 38 ---------
 tests/test_templater/test_helpers.py          | 79 +++++++++++++++++
 tests/test_templater/test_new_entrants.py     | 75 +----------------
 6 files changed, 140 insertions(+), 206 deletions(-)

diff --git a/src/ispypsa/templater/custom_constraints_from_plexos.py b/src/ispypsa/templater/custom_constraints_from_plexos.py
index c2a4672e..68c35fbe 100644
--- a/src/ispypsa/templater/custom_constraints_from_plexos.py
+++ b/src/ispypsa/templater/custom_constraints_from_plexos.py
@@ -176,6 +176,7 @@
 
 import pandas as pd
 
+from .helpers import _is_battery_row, _pick_location
 from .mappings import _CANONICAL_TIMESLICES
 
 # PLEXOS REZ-id prefixes that IASR renamed to DREZ. Applied to the first
@@ -924,30 +925,6 @@ def _battery_to_location(new_entrants: pd.DataFrame) -> dict[str, str]:
     return dict(zip(batteries["IASR ID / DLT names"], locations))
 
 
-def _is_battery_row(new_entrants: pd.DataFrame) -> pd.Series:
-    """Boolean mask selecting battery rows in new_entrants_summary.
-
-    Matches any Technology Type that contains the literal substring
-    "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular)
-    and "Distributed Resources Batteries" (plural). Other storage
-    technologies (pumped hydro, solar thermal) intentionally do not match.
-    """
-    return new_entrants["Technology Type"].str.contains("Batter", na=False)
-
-
-def _pick_location(row: pd.Series) -> str:
-    """Return REZ ID when populated, otherwise Sub-region.
-
-    I/O Example:
-        {"REZ ID": "Q8",             "Sub-region": "SQ"}  -> "Q8"
-        {"REZ ID": "Not Applicable", "Sub-region": "SQ"}  -> "SQ"
-    """
-    rez_id = row["REZ ID"]
-    if pd.notna(rez_id) and rez_id != "Not Applicable":
-        return rez_id
-    return row["Sub-region"]
-
-
 def _triggered_locations_per_constraint(
     lhs: pd.DataFrame, unit_to_location: dict[str, str]
 ) -> pd.DataFrame:
diff --git a/src/ispypsa/templater/helpers.py b/src/ispypsa/templater/helpers.py
index eb229e63..da9456b5 100644
--- a/src/ispypsa/templater/helpers.py
+++ b/src/ispypsa/templater/helpers.py
@@ -384,6 +384,51 @@ def _strip_all_text_after_numeric_value(
     return series
 
 
+def _pick_location(row: pd.Series) -> str:
+    """Return a technology's REZ ID when populated, otherwise Sub-region.
+
+    I/O Example:
+        {"REZ ID": "Q8",             "Sub-region": "SQ"}  -> "Q8"
+        {"REZ ID": "Not Applicable", "Sub-region": "SQ"}  -> "SQ"
+    """
+    rez_id = row["REZ ID"]
+    if pd.notna(rez_id) and rez_id != "Not Applicable":
+        return rez_id
+    return row["Sub-region"]
+
+
+def _is_battery_row(
+    df: pd.DataFrame, col_to_check: str = "Technology Type"
+) -> pd.Series:
+    """Boolean mask selecting battery technology rows in ``df``.
+
+    Matches any ``col_to_check`` row that contains the literal substring
+    "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular)
+    and "Distributed Resources Batteries" (plural). Other storage
+    technologies (pumped hydro, solar thermal) intentionally do not match.
+    """
+    return df[col_to_check].str.contains("Batter", na=False)
+
+
+def _is_pumped_hydro_row(
+    df: pd.DataFrame, col_to_check: str = "Technology Type"
+) -> pd.Series:
+    """Boolean mask selecting pumped hydro technology rows in ``df``.
+
+    Matches any ``col_to_check`` row that contains the literal substring
+    "Pumped Hydro" -- covering all durations. Other storage technologies
+    (batteries, solar thermal) intentionally do not match.
+    """
+    return df[col_to_check].str.contains("Pumped Hydro", na=False)
+
+
+def _is_storage_row(
+    df: pd.DataFrame, col_to_check: str = "Technology Type"
+) -> pd.Series:
+    """Wrapper that returns union of ``_is_battery_row`` and ``_is_pumped_hydro_row``."""
+    return _is_battery_row(df, col_to_check) | _is_pumped_hydro_row(df, col_to_check)
+
+
 def _standardise_storage_capitalisation(series: pd.Series) -> pd.Series:
     """
     Standardises capitalisation of "storage" in a pandas Series.
diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py
index 188217ea..5f06d2ef 100644
--- a/src/ispypsa/templater/new_entrants.py
+++ b/src/ispypsa/templater/new_entrants.py
@@ -18,7 +18,10 @@
 
 import pandas as pd
 
-from ispypsa.templater.helpers import _where_any_substring_appears
+from ispypsa.templater.helpers import (
+    _is_storage_row,
+    _pick_location,
+)
 
 _GENERATOR_IDENTITY_COLUMNS = [
     "name",
@@ -36,8 +39,6 @@
     "fuel_type",
 ]
 
-_STORAGE_TECHNOLOGY_STRINGS = ["battery", "batteries", "pumped hydro"]
-
 # Source (IASR new_entrants_summary) column names → schema output column names.
 _SUMMARY_COLUMN_RENAMES = {
     "IASR ID / DLT names": "name",
@@ -57,8 +58,8 @@
     "CST": "solar",
 }
 
-# Extraction pattern for the resource-quality code embedded between underscores in
-# a VRE IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast".
+# Regex extracting the resource-quality code embedded between underscores in a VRE                                                                                                                                                  # IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". Derived from the code map, it
+# expands to "_(WFX|WFL|SAT|...)_" — one capture group over the known codes                                                                                                                                                      # sorted longest-first so a short code can't shadow a longer one it prefixes.
 _RESOURCE_CODE_PATTERN = "_({})_".format(
     "|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True))
 )
@@ -91,7 +92,7 @@ def _template_generators_new_entrant(
 
     """
     logging.info("Creating a template for new entrant generators")
-    gens = _filter_to_technology_group(new_entrants_summary, "generators")
+    gens = new_entrants_summary[~_is_storage_row(new_entrants_summary)].copy()
     gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES)
     gens = _set_geo_id(gens)
     gens = _add_resource_type(gens)
@@ -119,7 +120,7 @@ def _template_storage_new_entrant(
         N3 Battery  Battery (2hrs)  N3      Battery
     """
     logging.info("Creating a template for new entrant storage")
-    storage = _filter_to_technology_group(new_entrants_summary, "storage")
+    storage = new_entrants_summary[_is_storage_row(new_entrants_summary)].copy()
     storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES)
     storage = _set_geo_id(storage)
     return storage[_STORAGE_IDENTITY_COLUMNS]
@@ -128,74 +129,13 @@ def _template_storage_new_entrant(
 # --- shared helpers ---
 
 
-def _filter_to_technology_group(
-    new_entrants_summary: pd.DataFrame, group: str
-) -> pd.DataFrame:
-    """Returns the summary rows for one technology group: generators or storage.
-
-    Storage rows are those whose "Technology Type" contains a
-    ``_STORAGE_TECHNOLOGY_STRINGS`` substring (battery, pumped hydro), matched
-    case-insensitively; generators are every other row. The two groups partition
-    the summary, so this single predicate is the only place the generator/storage
-    boundary is defined.
-
-    Args:
-        new_entrants_summary: the IASR ``new_entrants_summary`` table
-        group: "generators" or "storage".
-
-    I/O Example:
-        new_entrants_summary:
-            Technology Type                  REZ ID
-            Wind                             N3
-            Battery Storage (2hrs storage)   N3
-            Pumped Hydro (24hrs storage)     Not Applicable
-            OCGT (small GT)                  Not Applicable
-
-        group="generators" returns:
-            Technology Type                  REZ ID
-            Wind                             N3
-            OCGT (small GT)                  Not Applicable
-
-        group="storage" returns:
-            Technology Type                  REZ ID
-            Battery Storage (2hrs storage)   N3
-            Pumped Hydro (24hrs storage)     Not Applicable
-    """
-    is_storage = _where_any_substring_appears(
-        new_entrants_summary["Technology Type"], _STORAGE_TECHNOLOGY_STRINGS
-    )
-    if group == "storage":
-        return new_entrants_summary.loc[is_storage].reset_index(drop=True)
-    if group == "generators":
-        return new_entrants_summary.loc[~is_storage].reset_index(drop=True)
-    raise ValueError(
-        "Filtering new entrants table to technology group: "
-        f"group must be 'generators' or 'storage', got {group!r}"
-    )
-
-
 def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame:
-    """Sets ``geo_id`` from the row's REZ ID, falling back to its Sub-region.
+    """Adds 'geo_id' column to new_entrants containing REZ ID with Sub-region fallback.
 
-    I/O Example:
-        new_entrants:
-            technology                       REZ ID           Sub-region
-            Wind                             N3               CNSW
-            Large scale Solar PV             N0               CNSW       # Non-REZ: kept as-is
-            OCGT (small GT)                  Not Applicable   NQ
-            Pumped Hydro (24hrs storage)     Not Applicable   SNW
-
-        returns (adds geo_id):
-            technology                       REZ ID           Sub-region  geo_id
-            Wind                             N3               CNSW        N3
-            Large scale Solar PV             N0               CNSW        N0
-            OCGT (small GT)                  Not Applicable   NQ          NQ
-            Pumped Hydro (24hrs storage)     Not Applicable   SNW         SNW
+    Applies ``_pick_location`` helper to each row of the new_entrants table to
+    set their 'geo_id'. Simple wrapper for readability.
     """
-    new_entrants = new_entrants.copy()
-    new_entrants["geo_id"] = new_entrants["REZ ID"].where(
-        new_entrants["REZ ID"] != "Not Applicable", new_entrants["Sub-region"]
-    )
+    new_entrants["geo_id"] = new_entrants.apply(_pick_location, axis=1)
     return new_entrants
 
 
diff --git a/tests/test_templater/test_custom_constraints_from_plexos.py b/tests/test_templater/test_custom_constraints_from_plexos.py
index 2c3ae7a9..ce377382 100644
--- a/tests/test_templater/test_custom_constraints_from_plexos.py
+++ b/tests/test_templater/test_custom_constraints_from_plexos.py
@@ -31,12 +31,10 @@
     _generator_to_location,
     _iasr_id_choices,
     _inject_iasr_new_entrant_batteries,
-    _is_battery_row,
     _line_variable_name,
     _location_battery_pairs,
     _log_injected_batteries,
     _match_unit_name,
-    _pick_location,
     _plexos_extract_dir,
     _rename_battery_name,
     _rename_first_token,
@@ -830,42 +828,6 @@ def test_build_custom_constraints_rhs_maps_to_region_prefixed_canonical_timeslic
     pd.testing.assert_frame_equal(result, expected)
 
 
-# --- _is_battery_row ---
-
-
-def test_is_battery_row(csv_str_to_df):
-    new_entrants = csv_str_to_df("""
-        IASR ID / DLT names,  Technology Type
-        Q1 Battery - 2h,       Battery Storage (2hrs storage)
-        NQ Battery - Dist,     Distributed Resources Batteries
-        Q1 Wind,                 Wind
-        N1 Pumped Hydro - 24h,Pumped Hydro (24hrs storage)
-        Q1 Solar Thermal,       Solar Thermal (16hrs storage)
-    """)
-
-    result = _is_battery_row(new_entrants)
-
-    # Battery + Distributed Resources Batteries match; others (incl. pumped
-    # hydro and solar thermal storage) do not.
-    assert list(result) == [True, True, False, False, False]
-
-
-# --- _pick_location ---
-
-
-@pytest.mark.parametrize(
-    "rez_id, sub_region, expected",
-    [
-        ("Q8", "SQ", "Q8"),  # REZ ID populated -> REZ ID
-        ("Not Applicable", "SQ", "SQ"),  # 'Not Applicable' -> Sub-region
-        (None, "SQ", "SQ"),  # NaN/None -> Sub-region
-    ],
-)
-def test_pick_location(rez_id, sub_region, expected):
-    row = pd.Series({"REZ ID": rez_id, "Sub-region": sub_region})
-    assert _pick_location(row) == expected
-
-
 # --- _generator_to_location ---
 
 
diff --git a/tests/test_templater/test_helpers.py b/tests/test_templater/test_helpers.py
index 2696aa3c..6d474501 100644
--- a/tests/test_templater/test_helpers.py
+++ b/tests/test_templater/test_helpers.py
@@ -2,8 +2,12 @@
 import pytest
 
 from ispypsa.templater.helpers import (
+    _is_battery_row,
+    _is_pumped_hydro_row,
+    _is_storage_row,
     _looks_like_financial_year,
     _manual_remove_footnotes_from_generator_names,
+    _pick_location,
     _rez_name_to_id_mapping,
     _snakecase_string,
     _standardise_storage_capitalisation,
@@ -373,3 +377,78 @@ def test_looks_like_financial_year_matches_only_canonical_formats():
     assert _looks_like_financial_year("24-25") is False
     assert _looks_like_financial_year("Status") is False
     assert _looks_like_financial_year("Flow path") is False
+
+
+# --- _pick_location ---
+
+
+@pytest.mark.parametrize(
+    "rez_id, sub_region, expected",
+    [
+        ("Q8", "SQ", "Q8"),  # REZ ID populated -> REZ ID
+        ("Not Applicable", "SQ", "SQ"),  # 'Not Applicable' -> Sub-region
+        (None, "SQ", "SQ"),  # NaN/None -> Sub-region
+    ],
+)
+def test_pick_location(rez_id, sub_region, expected):
+    row = pd.Series({"REZ ID": rez_id, "Sub-region": sub_region})
+    assert _pick_location(row) == expected
+
+
+# --- _is_battery_row ---
+
+
+def test_is_battery_row(csv_str_to_df):
+    new_entrants = csv_str_to_df("""
+        IASR ID / DLT names,    Technology Type
+        Q1 Battery - 2h,        Battery Storage (2hrs storage)
+        NQ Battery - Dist,      Distributed Resources Batteries
+        Q1 Wind,                Wind
+        N1 Pumped Hydro - 24h,  Pumped Hydro (24hrs storage)
+        Q1 Solar Thermal,       Solar Thermal (16hrs storage)
+    """)
+
+    result = _is_battery_row(new_entrants)
+
+    # Battery + Distributed Resources Batteries match; others (incl. pumped
+    # hydro and solar thermal storage) do not.
+    assert list(result) == [True, True, False, False, False]
+
+
+# --- _is_pumped_hydro_row ---
+
+
+def test_is_pumped_hydro_row(csv_str_to_df):
+    new_entrants = csv_str_to_df("""
+        IASR ID / DLT names,    Technology Type
+        Q1 Battery - 2h,        Battery Storage (2hrs storage)
+        NQ Battery - Dist,      Distributed Resources Batteries
+        Q1 Wind,                Wind
+        N1 Pumped Hydro - 24h,  Pumped Hydro (24hrs storage)
+        Q1 Solar Thermal,       Solar Thermal (16hrs storage)
+    """)
+
+    result = _is_pumped_hydro_row(new_entrants)
+
+    # Pumped Hydro resources match; Batter* and other storage do not.
+    assert list(result) == [False, False, False, True, False]
+
+
+# --- _is_storage_row ---
+
+
+def test_is_storage_row(csv_str_to_df):
+    new_entrants = csv_str_to_df("""
+        IASR ID / DLT names,    Technology
+        Q1 Battery - 2h,        Battery Storage (2hrs storage)
+        NQ Battery - Dist,      Distributed Resources Batteries
+        Q1 Wind,                Wind
+        N1 Pumped Hydro - 24h,  Pumped Hydro (24hrs storage)
+        Q1 Solar Thermal,       Solar Thermal (16hrs storage)
+    """)
+
+    result = _is_storage_row(new_entrants, col_to_check="Technology")
+
+    # Battery,  Distributed Resources Batteries and Pumped Hydro all match.
+    # Solar thermal still does not.
+    assert list(result) == [True, True, False, True, False]
diff --git a/tests/test_templater/test_new_entrants.py b/tests/test_templater/test_new_entrants.py
index f211f4f2..2b32d386 100644
--- a/tests/test_templater/test_new_entrants.py
+++ b/tests/test_templater/test_new_entrants.py
@@ -5,7 +5,6 @@
     _GENERATOR_IDENTITY_COLUMNS,
     _STORAGE_IDENTITY_COLUMNS,
     _add_resource_type,
-    _filter_to_technology_group,
     _set_geo_id,
     _template_generators_new_entrant,
     _template_storage_new_entrant,
@@ -55,82 +54,16 @@ def test_template_storage_new_entrant(csv_str_to_df):
     assert len(result) == 3
 
 
-# --- _filter_to_technology_group ---
-
-
-def test_filter_to_technology_group(csv_str_to_df):
-    # test core split/filter function returns both groups correctly
-    new_entrants_summary = csv_str_to_df("""
-        Technology Type,                    REZ ID
-        Wind,                               N3
-        Large scale Solar PV,               N3
-        Battery Storage (2hrs storage),     N3
-        Distributed Resources Batteries,    Not Applicable
-        Pumped Hydro (24hrs storage),       Not Applicable
-        OCGT (small GT),                    Not Applicable
-    """)
-
-    # All storage variants (batteries, distributed batteries, pumped hydro) are
-    # dropped; generation rows pass through unchanged with other columns intact.
-    generators = _filter_to_technology_group(new_entrants_summary, "generators")
-
-    expected_gens = csv_str_to_df("""
-        Technology Type,        REZ ID
-        Wind,                   N3
-        Large scale Solar PV,   N3
-        OCGT (small GT),        Not Applicable
-    """)
-    pd.testing.assert_frame_equal(generators, expected_gens)
-
-    # Only storage variants (batteries, distributed batteries, pumped hydro) are
-    # kept - unchanged, with other columns intact.
-    storage = _filter_to_technology_group(new_entrants_summary, "storage")
-
-    expected_storage = csv_str_to_df("""
-        Technology Type,                    REZ ID
-        Battery Storage (2hrs storage),     N3
-        Distributed Resources Batteries,    Not Applicable
-        Pumped Hydro (24hrs storage),       Not Applicable
-    """)
-    pd.testing.assert_frame_equal(storage, expected_storage)
-
-
-def test_filter_to_technology_group_raises_unknown_group(csv_str_to_df, caplog):
-    # Raises on non-permitted 'group' arg (not "generators" or "storage")
-    new_entrants_summary = csv_str_to_df("""
-        Technology Type,                    REZ ID
-        Wind,                               N3
-        Pumped Hydro (24hrs storage),       Not Applicable
-    """)
-
-    with pytest.raises(ValueError, match="group must be 'generators' or 'storage'"):
-        _filter_to_technology_group(new_entrants_summary, "computers")
-
-
-def test_filter_to_technology_group_empty_input(csv_str_to_df):
-    # Empty input (all columns, no rows) returns an empty frame, no errors.
-    new_entrants_summary = pd.DataFrame(columns=["Technology Type", "REZ ID"])
-
-    result = _filter_to_technology_group(new_entrants_summary, "storage")
-
-    expected = csv_str_to_df("""
-        Technology Type, REZ ID
-    """)
-    pd.testing.assert_frame_equal(result, expected, check_dtype=False)
-
-
 # --- _set_geo_id ---
 
 
 def test_set_geo_id(csv_str_to_df):
-    # REZ-located rows take their REZ ID (incl. Non-REZ N0/V0); thermal, pumped hydro
-    # and distributed rows ("Not Applicable") fall back to their Sub-region.
+    # Check that the wrapper adds 'geo_id' column, correctly applying ``_pick_location``
+    # and not impacting existing columns.
     new_entrants = csv_str_to_df("""
         technology,                     REZ ID,         Sub-region
         Wind,                           N3,             CNSW
-        Large scale Solar PV,           N0,             CNSW
         OCGT (small GT),                Not Applicable, NQ
-        Pumped Hydro (24hrs storage),   Not Applicable, SNW
     """)
 
     result = _set_geo_id(new_entrants)
@@ -138,15 +71,13 @@ def test_set_geo_id(csv_str_to_df):
     expected = csv_str_to_df("""
         technology,                     REZ ID,         Sub-region, geo_id
         Wind,                           N3,             CNSW,       N3
-        Large scale Solar PV,           N0,             CNSW,       N0
         OCGT (small GT),                Not Applicable, NQ,         NQ
-        Pumped Hydro (24hrs storage),   Not Applicable, SNW,        SNW
     """)
     pd.testing.assert_frame_equal(result, expected)
 
 
 def test_set_geo_id_empty_input(csv_str_to_df):
-    # Empty input still returns the geo_id column (all columns, no rows).
+    # Empty input still returns the added geo_id column
     new_entrants = pd.DataFrame(columns=["technology", "REZ ID", "Sub-region"])
 
     result = _set_geo_id(new_entrants)