Open-ISP · EllieKallmier · Jun 24, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -191,6 +191,7 @@ notes/
 .DS_Store
 # ignore claude stuff
 .claude/settings.local.json
+CLAUDE.local.md
 
 # ignore all pypsa output files
 *.nc
diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py
@@ -30,6 +30,10 @@
     _filter_flow_path_augmentations_to_granularity,
     _template_network_expansion,
 )
+from ispypsa.templater.new_entrants import (
+    _template_generators_new_entrant,
+    _template_storage_new_entrant,
+)
 from ispypsa.templater.nodes import (
     _template_regions,
     _template_sub_regions,
@@ -220,10 +224,6 @@ def create_ispypsa_inputs_template(
         template["network_expansion_options"] = expansion_options
         template["network_transmission_path_expansion_costs"] = expansion_costs
 
-        # todo: replace with actual generators_new_entrant once that templating
-        # function is written — passing empty placeholder for now so costs_connection
-        # is wired up but produces no VRE rows until generators are templated.
-
         # connection_capacity_non_vre is in manually_extracted_template_tables/ (sourced from
         # ENOR tables 16-17 and confirmed with AEMO) but is needed as an iasr_tables input,
         # not a template output. TODO revisit when more manual tables added and consider
@@ -232,8 +232,13 @@ def create_ispypsa_inputs_template(
             "connection_capacity_non_vre"
         ].copy()
 
-        generators_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
-        storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
+        # Identity columns only for now - not yet a templater output
+        generators_new_entrant = _template_generators_new_entrant(
+            iasr_tables["new_entrants_summary"]
+        )
+        storage_new_entrant = _template_storage_new_entrant(
+            iasr_tables["new_entrants_summary"]
+        )
         template["costs_connection"] = _template_connection_costs(
             iasr_tables,
             scenario,

diff --git a/src/ispypsa/templater/custom_constraints_from_plexos.py b/src/ispypsa/templater/custom_constraints_from_plexos.py
@@ -176,6 +176,7 @@
 
 import pandas as pd
 
+from .helpers import _is_battery_row, _pick_location
 from .mappings import _CANONICAL_TIMESLICES
 
 # PLEXOS REZ-id prefixes that IASR renamed to DREZ. Applied to the first
@@ -924,30 +925,6 @@ def _battery_to_location(new_entrants: pd.DataFrame) -> dict[str, str]:
     return dict(zip(batteries["IASR ID / DLT names"], locations))
 
 
-def _is_battery_row(new_entrants: pd.DataFrame) -> pd.Series:
-    """Boolean mask selecting battery rows in new_entrants_summary.
-
-    Matches any Technology Type that contains the literal substring
-    "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular)
-    and "Distributed Resources Batteries" (plural). Other storage
-    technologies (pumped hydro, solar thermal) intentionally do not match.
-    """
-    return new_entrants["Technology Type"].str.contains("Batter", na=False)
-
-
-def _pick_location(row: pd.Series) -> str:
-    """Return REZ ID when populated, otherwise Sub-region.
-
-    I/O Example:
-        {"REZ ID": "Q8",             "Sub-region": "SQ"}  -> "Q8"
-        {"REZ ID": "Not Applicable", "Sub-region": "SQ"}  -> "SQ"
-    """
-    rez_id = row["REZ ID"]
-    if pd.notna(rez_id) and rez_id != "Not Applicable":
-        return rez_id
-    return row["Sub-region"]
-
-
 def _triggered_locations_per_constraint(
     lhs: pd.DataFrame, unit_to_location: dict[str, str]
 ) -> pd.DataFrame:

diff --git a/src/ispypsa/templater/helpers.py b/src/ispypsa/templater/helpers.py
@@ -384,6 +384,51 @@ def _strip_all_text_after_numeric_value(
     return series
 
 
+def _pick_location(row: pd.Series) -> str:
+    """Return a technology's REZ ID when populated, otherwise Sub-region.
+
+    I/O Example:
+        {"REZ ID": "Q8",             "Sub-region": "SQ"}  -> "Q8"
+        {"REZ ID": "Not Applicable", "Sub-region": "SQ"}  -> "SQ"
+    """
+    rez_id = row["REZ ID"]
+    if pd.notna(rez_id) and rez_id != "Not Applicable":
+        return rez_id
+    return row["Sub-region"]
+
+
+def _is_battery_row(
+    df: pd.DataFrame, col_to_check: str = "Technology Type"
+) -> pd.Series:
+    """Boolean mask selecting battery technology rows in ``df``.
+
+    Matches any ``col_to_check`` row that contains the literal substring
+    "Batter" -- covers both "Battery Storage (Xhrs storage)" (singular)
+    and "Distributed Resources Batteries" (plural). Other storage
+    technologies (pumped hydro, solar thermal) intentionally do not match.
+    """
+    return df[col_to_check].str.contains("Batter", na=False)
+
+
+def _is_pumped_hydro_row(
+    df: pd.DataFrame, col_to_check: str = "Technology Type"
+) -> pd.Series:
+    """Boolean mask selecting pumped hydro technology rows in ``df``.
+
+    Matches any ``col_to_check`` row that contains the literal substring
+    "Pumped Hydro" -- covering all durations. Other storage technologies
+    (batteries, solar thermal) intentionally do not match.
+    """
+    return df[col_to_check].str.contains("Pumped Hydro", na=False)
+
+
+def _is_storage_row(
+    df: pd.DataFrame, col_to_check: str = "Technology Type"
+) -> pd.Series:
+    """Wrapper that returns union of ``_is_battery_row`` and ``_is_pumped_hydro_row``."""
+    return _is_battery_row(df, col_to_check) | _is_pumped_hydro_row(df, col_to_check)
+
+
 def _standardise_storage_capitalisation(series: pd.Series) -> pd.Series:
     """
     Standardises capitalisation of "storage" in a pandas Series.

diff --git a/src/ispypsa/templater/new_entrants.py b/src/ispypsa/templater/new_entrants.py
@@ -0,0 +1,177 @@
+"""Templates the new entrant generator and storage identity tables.
+
+Both tables are currently built from a single IASR input, the ``new_entrants_summary``
+table. This module splits that table into its two subsets and shapes each into the
+identity columns of its target schema (see schemas/generators_new_entrant.yaml and
+schemas/storage_new_entrant.yaml).
+
+There are two independent public orchestrators, one per output table, each taking
+the full summary. They share the same shape:
+    1. Filter the summary to the relevant technology group
+    2. Rename the carried-over summary columns to their schema names
+    3. Derive geo_id
+    4. (Generators only) Derive resource_type
+    5. Select the table's group-specific identity columns.
+"""
+
+import logging
+
+import pandas as pd
+
+from ispypsa.templater.helpers import (
+    _is_storage_row,
+    _pick_location,
+)
+
+_GENERATOR_IDENTITY_COLUMNS = [
+    "name",
+    "technology",
+    "resource_type",
+    "geo_id",
+    "fuel_type",
+    "fuel_price_mapping",
+]
+
+_STORAGE_IDENTITY_COLUMNS = [
+    "name",
+    "technology",
+    "geo_id",
+    "fuel_type",
+]
+
+# Source (IASR new_entrants_summary) column names → schema output column names.
+_SUMMARY_COLUMN_RENAMES = {
+    "IASR ID / DLT names": "name",
+    "Technology Type": "technology",
+    "Fuel type": "fuel_type",
+    "Fuel cost mapping": "fuel_price_mapping",
+}
+
+# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a
+# mapping for it if/when resource_limits templating requires one.
+_RESOURCE_QUALITY_CODE_TO_TYPE = {
+    "WH": "wind_high",
+    "WM": "wind_medium",
+    "WFX": "wind_offshore_fixed",
+    "WFL": "wind_offshore_floating",
+    "SAT": "solar",
+    "CST": "solar",
+}
+
+# Regex extracting the resource-quality code embedded between underscores in a VRE                                                                                                                                                  # IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". Derived from the code map, it
+# expands to "_(WFX|WFL|SAT|...)_" — one capture group over the known codes                                                                                                                                                      # sorted longest-first so a short code can't shadow a longer one it prefixes.
+_RESOURCE_CODE_PATTERN = "_({})_".format(
+    "|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True))
+)
+
+
+# --- public orchestrators ---
+
+
+# NOTE: partial scope intentional - other columns to be added in next PRs!
+def _template_generators_new_entrant(
+    new_entrants_summary: pd.DataFrame,
+) -> pd.DataFrame:
+    """Templates the new entrant generators identity table from the IASR summary.
+
+    Keeps only generator rows, renames the carried-over summary columns to schema
+    names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE
+    resource code in the IASR ID), and returns the identity columns.
+
+    I/O Example:
+        new_entrants_summary (abbr.):
+            IASR ID     Power Station   Technology Type REZ ID          Sub-region  Fuel type   Fuel cost mapping
+            N3_WH_rez   N3_WH_rez       Wind            N3              NNSW        Wind        Wind
+            N3 Battery  N3 Battery      Battery (2hrs)  N3              NNSW        Battery     Battery
+            SQ CCGT     SQ CCGT         CCGT            Not Applicable  SQ          Gas         QLD new CCGT
+
+    Returns:
+        name        technology  resource_type   geo_id  fuel_type   fuel_price_mapping
+        N3_WH_rez   Wind        wind_high       N3      Wind        Wind
+        SQ CCGT     CCGT                        SQ      Gas         QLD new CCGT
+
+    """
+    logging.info("Creating a template for new entrant generators")
+    gens = new_entrants_summary[~_is_storage_row(new_entrants_summary)].copy()
+    gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES)
+    gens = _set_geo_id(gens)
+    gens = _add_resource_type(gens)
+    return gens[_GENERATOR_IDENTITY_COLUMNS]
+
+
+# NOTE: partial scope intentional - other columns to be added in next PRs!
+def _template_storage_new_entrant(
+    new_entrants_summary: pd.DataFrame,
+) -> pd.DataFrame:
+    """Templates the new entrant storage identity table from the IASR summary.
+
+    Keeps only storage rows, renames the carried-over summary columns to schema
+    names, derives geo_id (REZ ID or sub-region), and returns the identity columns.
+
+    I/O Example:
+        new_entrants_summary (abbr.):
+            IASR ID     Power Station   Technology Type REZ ID          Sub-region  Fuel type   Fuel cost mapping
+            N3_WH_rez   N3_WH_rez       Wind            N3              NNSW        Wind        Wind
+            N3 Battery  N3 Battery      Battery (2hrs)  N3              NNSW        Battery     Battery
+            SQ CCGT     SQ CCGT         CCGT            Not Applicable  SQ          Gas         QLD new CCGT
+
+    Returns:
+        name        technology      geo_id  fuel_type
+        N3 Battery  Battery (2hrs)  N3      Battery
+    """
+    logging.info("Creating a template for new entrant storage")
+    storage = new_entrants_summary[_is_storage_row(new_entrants_summary)].copy()
+    storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES)
+    storage = _set_geo_id(storage)
+    return storage[_STORAGE_IDENTITY_COLUMNS]
+
+
+# --- shared helpers ---
+
+
+def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame:
+    """Adds 'geo_id' column to new_entrants containing REZ ID with Sub-region fallback.
+
+    Applies ``_pick_location`` helper to each row of the new_entrants table to
+    set their 'geo_id'. Simple wrapper for readability.
+    """
+    new_entrants["geo_id"] = new_entrants.apply(_pick_location, axis=1)
+    return new_entrants
+
+
+# --- generator-specific helpers ---
+
+
+def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame:
+    """Adds the VRE ``resource_type`` column from the resource code in ``name``.
+
+    VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH"
+    in "Q1_WH_Far North QLD". The code is extracted and mapped via
+    ``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with no matching code — the underscore-
+    free thermal and distributed-resource rows — get NaN.
+
+    I/O Example:
+        gens:
+            name                              technology
+            Q1_WH_Far North QLD               Wind
+            Q1_WM_Far North QLD               Wind
+            N10_WFX_Hunter Coast              Wind - offshore (fixed)
+            DREZ_SAT_Dubbo                    Large scale Solar PV
+            N0_CST_NSW                        Solar Thermal (16hrs storage)
+            CNSW SAT - Distributed Resources  Distributed Resources Solar
+            CNSW OCGT Small                   OCGT (small GT)
+
+        returns (adds resource_type):
+            name                              technology                     resource_type
+            Q1_WH_Far North QLD               Wind                           wind_high
+            Q1_WM_Far North QLD               Wind                           wind_medium
+            N10_WFX_Hunter Coast              Wind - offshore (fixed)        wind_offshore_fixed
+            DREZ_SAT_Dubbo                    Large scale Solar PV           solar
+            N0_CST_NSW                        Solar Thermal (16hrs storage)  solar  # CST -> solar
+            CNSW SAT - Distributed Resources  Distributed Resources Solar   NaN  # no _ token
+            CNSW OCGT Small                   OCGT (small GT)                NaN  # no _ token
+    """
+    gens = gens.copy()
+    resource_code = gens["name"].str.extract(_RESOURCE_CODE_PATTERN, expand=False)
+    gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE)
+    return gens
diff --git a/src/ispypsa/validation/schemas/storage_new_entrant.yaml b/src/ispypsa/validation/schemas/storage_new_entrant.yaml
@@ -32,14 +32,6 @@ columns:
     type: string
     required: true
     description: Unique identifier for the storage unit (e.g. IASR ID or full name).
-  power_station:
-    type: string
-    required: true
-    description: >
-      Power station name grouping storage units together.
-
-      For new entrant storage units, this is always the same as the `name` field
-      (i.e, no grouping is performed). Keeping for consistency with existing storage tables.
   technology:
     type: string
     required: true

diff --git a/tests/test_iasr_table_caching/test_local_cache.py b/tests/test_iasr_table_caching/test_local_cache.py
@@ -27,6 +27,8 @@ def test_build_required_tables_new_format():
     assert "connection_costs_for_wind_and_solar" in result
     assert "connection_costs_other" in result
     assert "efficient_level_of_system_strength_cost" in result
+    # New entrant generator summary feeds the generators_new_entrant templater
+    assert "new_entrants_summary" in result
 
 
 def test_build_required_tables_old_format():