Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ notes/
.DS_Store
# ignore claude stuff
.claude/settings.local.json
CLAUDE.local.md

# ignore all pypsa output files
*.nc
17 changes: 11 additions & 6 deletions src/ispypsa/templater/create_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
_filter_flow_path_augmentations_to_granularity,
_template_network_expansion,
)
from ispypsa.templater.new_entrants import (
_template_generators_new_entrant,
_template_storage_new_entrant,
)
from ispypsa.templater.nodes import (
_template_regions,
_template_sub_regions,
Expand Down Expand Up @@ -220,10 +224,6 @@ def create_ispypsa_inputs_template(
template["network_expansion_options"] = expansion_options
template["network_transmission_path_expansion_costs"] = expansion_costs

# todo: replace with actual generators_new_entrant once that templating
# function is written — passing empty placeholder for now so costs_connection
# is wired up but produces no VRE rows until generators are templated.

# connection_capacity_non_vre is in manually_extracted_template_tables/ (sourced from
# ENOR tables 16-17 and confirmed with AEMO) but is needed as an iasr_tables input,
# not a template output. TODO revisit when more manual tables added and consider
Expand All @@ -232,8 +232,13 @@ def create_ispypsa_inputs_template(
"connection_capacity_non_vre"
].copy()

generators_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
storage_new_entrant = pd.DataFrame(columns=["geo_id", "technology"])
# Identity columns only for now - not yet a templater output
generators_new_entrant = _template_generators_new_entrant(
iasr_tables["new_entrants_summary"]
)
storage_new_entrant = _template_storage_new_entrant(
iasr_tables["new_entrants_summary"]
)
template["costs_connection"] = _template_connection_costs(
iasr_tables,
scenario,
Expand Down
25 changes: 1 addition & 24 deletions src/ispypsa/templater/custom_constraints_from_plexos.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@

import pandas as pd

from .helpers import _is_battery_row, _pick_location
from .mappings import _CANONICAL_TIMESLICES

# PLEXOS REZ-id prefixes that IASR renamed to DREZ. Applied to the first
Expand Down Expand Up @@ -924,30 +925,6 @@ def _battery_to_location(new_entrants: pd.DataFrame) -> dict[str, str]:
return dict(zip(batteries["IASR ID / DLT names"], locations))


def _is_battery_row(new_entrants: pd.DataFrame) -> pd.Series:
"""Boolean mask selecting battery rows in new_entrants_summary.

Matches any Technology Type that contains the literal substring
"Batter" -- covers both "Battery Storage (Xhrs storage)" (singular)
and "Distributed Resources Batteries" (plural). Other storage
technologies (pumped hydro, solar thermal) intentionally do not match.
"""
return new_entrants["Technology Type"].str.contains("Batter", na=False)


def _pick_location(row: pd.Series) -> str:
"""Return REZ ID when populated, otherwise Sub-region.

I/O Example:
{"REZ ID": "Q8", "Sub-region": "SQ"} -> "Q8"
{"REZ ID": "Not Applicable", "Sub-region": "SQ"} -> "SQ"
"""
rez_id = row["REZ ID"]
if pd.notna(rez_id) and rez_id != "Not Applicable":
return rez_id
return row["Sub-region"]


def _triggered_locations_per_constraint(
lhs: pd.DataFrame, unit_to_location: dict[str, str]
) -> pd.DataFrame:
Expand Down
45 changes: 45 additions & 0 deletions src/ispypsa/templater/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,51 @@ def _strip_all_text_after_numeric_value(
return series


def _pick_location(row: pd.Series) -> str:
"""Return a technology's REZ ID when populated, otherwise Sub-region.

I/O Example:
{"REZ ID": "Q8", "Sub-region": "SQ"} -> "Q8"
{"REZ ID": "Not Applicable", "Sub-region": "SQ"} -> "SQ"
"""
rez_id = row["REZ ID"]
if pd.notna(rez_id) and rez_id != "Not Applicable":
return rez_id
return row["Sub-region"]


def _is_battery_row(
df: pd.DataFrame, col_to_check: str = "Technology Type"
) -> pd.Series:
"""Boolean mask selecting battery technology rows in ``df``.

Matches any ``col_to_check`` row that contains the literal substring
"Batter" -- covers both "Battery Storage (Xhrs storage)" (singular)
and "Distributed Resources Batteries" (plural). Other storage
technologies (pumped hydro, solar thermal) intentionally do not match.
"""
return df[col_to_check].str.contains("Batter", na=False)


def _is_pumped_hydro_row(
df: pd.DataFrame, col_to_check: str = "Technology Type"
) -> pd.Series:
"""Boolean mask selecting pumped hydro technology rows in ``df``.

Matches any ``col_to_check`` row that contains the literal substring
"Pumped Hydro" -- covering all durations. Other storage technologies
(batteries, solar thermal) intentionally do not match.
"""
return df[col_to_check].str.contains("Pumped Hydro", na=False)


def _is_storage_row(
df: pd.DataFrame, col_to_check: str = "Technology Type"
) -> pd.Series:
"""Wrapper that returns union of ``_is_battery_row`` and ``_is_pumped_hydro_row``."""
return _is_battery_row(df, col_to_check) | _is_pumped_hydro_row(df, col_to_check)


def _standardise_storage_capitalisation(series: pd.Series) -> pd.Series:
"""
Standardises capitalisation of "storage" in a pandas Series.
Expand Down
177 changes: 177 additions & 0 deletions src/ispypsa/templater/new_entrants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""Templates the new entrant generator and storage identity tables.

Both tables are currently built from a single IASR input, the ``new_entrants_summary``
table. This module splits that table into its two subsets and shapes each into the
identity columns of its target schema (see schemas/generators_new_entrant.yaml and
schemas/storage_new_entrant.yaml).

There are two independent public orchestrators, one per output table, each taking
the full summary. They share the same shape:
1. Filter the summary to the relevant technology group
2. Rename the carried-over summary columns to their schema names
3. Derive geo_id
4. (Generators only) Derive resource_type
5. Select the table's group-specific identity columns.
"""

import logging

import pandas as pd

from ispypsa.templater.helpers import (
_is_storage_row,
_pick_location,
)

_GENERATOR_IDENTITY_COLUMNS = [
"name",
"technology",
"resource_type",
"geo_id",
"fuel_type",
"fuel_price_mapping",
]

_STORAGE_IDENTITY_COLUMNS = [
"name",
"technology",
"geo_id",
"fuel_type",
]

# Source (IASR new_entrants_summary) column names → schema output column names.
_SUMMARY_COLUMN_RENAMES = {
"IASR ID / DLT names": "name",
"Technology Type": "technology",
"Fuel type": "fuel_type",
"Fuel cost mapping": "fuel_price_mapping",
}

# TODO(revisit): Distributed Resources Solar currently gets no resource_type; add a
# mapping for it if/when resource_limits templating requires one.
_RESOURCE_QUALITY_CODE_TO_TYPE = {
"WH": "wind_high",
"WM": "wind_medium",
"WFX": "wind_offshore_fixed",
"WFL": "wind_offshore_floating",
"SAT": "solar",
"CST": "solar",
}

# Regex extracting the resource-quality code embedded between underscores in a VRE # IASR ID, e.g. "WFX" in "N10_WFX_Hunter Coast". Derived from the code map, it
# expands to "_(WFX|WFL|SAT|...)_" — one capture group over the known codes # sorted longest-first so a short code can't shadow a longer one it prefixes.
_RESOURCE_CODE_PATTERN = "_({})_".format(
"|".join(sorted(_RESOURCE_QUALITY_CODE_TO_TYPE, key=len, reverse=True))
)


# --- public orchestrators ---


# NOTE: partial scope intentional - other columns to be added in next PRs!
def _template_generators_new_entrant(
new_entrants_summary: pd.DataFrame,
) -> pd.DataFrame:
"""Templates the new entrant generators identity table from the IASR summary.

Keeps only generator rows, renames the carried-over summary columns to schema
names, derives geo_id (REZ ID or sub-region) and resource_type (from the VRE
resource code in the IASR ID), and returns the identity columns.

I/O Example:
new_entrants_summary (abbr.):
IASR ID Power Station Technology Type REZ ID Sub-region Fuel type Fuel cost mapping
N3_WH_rez N3_WH_rez Wind N3 NNSW Wind Wind
N3 Battery N3 Battery Battery (2hrs) N3 NNSW Battery Battery
SQ CCGT SQ CCGT CCGT Not Applicable SQ Gas QLD new CCGT

Returns:
name technology resource_type geo_id fuel_type fuel_price_mapping
N3_WH_rez Wind wind_high N3 Wind Wind
SQ CCGT CCGT SQ Gas QLD new CCGT

"""
logging.info("Creating a template for new entrant generators")
gens = new_entrants_summary[~_is_storage_row(new_entrants_summary)].copy()
gens = gens.rename(columns=_SUMMARY_COLUMN_RENAMES)
gens = _set_geo_id(gens)
gens = _add_resource_type(gens)
return gens[_GENERATOR_IDENTITY_COLUMNS]


# NOTE: partial scope intentional - other columns to be added in next PRs!
def _template_storage_new_entrant(
new_entrants_summary: pd.DataFrame,
) -> pd.DataFrame:
"""Templates the new entrant storage identity table from the IASR summary.

Keeps only storage rows, renames the carried-over summary columns to schema
names, derives geo_id (REZ ID or sub-region), and returns the identity columns.

I/O Example:
new_entrants_summary (abbr.):
IASR ID Power Station Technology Type REZ ID Sub-region Fuel type Fuel cost mapping
N3_WH_rez N3_WH_rez Wind N3 NNSW Wind Wind
N3 Battery N3 Battery Battery (2hrs) N3 NNSW Battery Battery
SQ CCGT SQ CCGT CCGT Not Applicable SQ Gas QLD new CCGT

Returns:
name technology geo_id fuel_type
N3 Battery Battery (2hrs) N3 Battery
"""
logging.info("Creating a template for new entrant storage")
storage = new_entrants_summary[_is_storage_row(new_entrants_summary)].copy()
storage = storage.rename(columns=_SUMMARY_COLUMN_RENAMES)
storage = _set_geo_id(storage)
return storage[_STORAGE_IDENTITY_COLUMNS]


# --- shared helpers ---


def _set_geo_id(new_entrants: pd.DataFrame) -> pd.DataFrame:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhat duplicates custom_constraints_from_plexos.py:938 _pick_location. We could extract to a helper, but it doesn't concern me too much,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also guard against future REZ ID being NaN rather than "Not Applicable", but that goes against our lets just assume the templater inputs are fixed principal, on the other hand its a cheap bit of robustness.

"""Adds 'geo_id' column to new_entrants containing REZ ID with Sub-region fallback.

Applies ``_pick_location`` helper to each row of the new_entrants table to
set their 'geo_id'. Simple wrapper for readability.
"""
new_entrants["geo_id"] = new_entrants.apply(_pick_location, axis=1)
return new_entrants


# --- generator-specific helpers ---


def _add_resource_type(gens: pd.DataFrame) -> pd.DataFrame:
"""Adds the VRE ``resource_type`` column from the resource code in ``name``.

VRE IASR IDs embed a resource-quality code between underscores — e.g. the "WH"
in "Q1_WH_Far North QLD". The code is extracted and mapped via
``_RESOURCE_QUALITY_CODE_TO_TYPE``. IDs with no matching code — the underscore-
free thermal and distributed-resource rows — get NaN.

I/O Example:
gens:
name technology
Q1_WH_Far North QLD Wind
Q1_WM_Far North QLD Wind
N10_WFX_Hunter Coast Wind - offshore (fixed)
DREZ_SAT_Dubbo Large scale Solar PV
N0_CST_NSW Solar Thermal (16hrs storage)
CNSW SAT - Distributed Resources Distributed Resources Solar
CNSW OCGT Small OCGT (small GT)

returns (adds resource_type):
name technology resource_type
Q1_WH_Far North QLD Wind wind_high
Q1_WM_Far North QLD Wind wind_medium
N10_WFX_Hunter Coast Wind - offshore (fixed) wind_offshore_fixed
DREZ_SAT_Dubbo Large scale Solar PV solar
N0_CST_NSW Solar Thermal (16hrs storage) solar # CST -> solar
CNSW SAT - Distributed Resources Distributed Resources Solar NaN # no _ token
CNSW OCGT Small OCGT (small GT) NaN # no _ token
"""
gens = gens.copy()
resource_code = gens["name"].str.extract(_RESOURCE_CODE_PATTERN, expand=False)
gens["resource_type"] = resource_code.map(_RESOURCE_QUALITY_CODE_TO_TYPE)
return gens
8 changes: 0 additions & 8 deletions src/ispypsa/validation/schemas/storage_new_entrant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ columns:
type: string
required: true
description: Unique identifier for the storage unit (e.g. IASR ID or full name).
power_station:
type: string
required: true
description: >
Power station name grouping storage units together.

For new entrant storage units, this is always the same as the `name` field
(i.e, no grouping is performed). Keeping for consistency with existing storage tables.
technology:
type: string
required: true
Expand Down
2 changes: 2 additions & 0 deletions tests/test_iasr_table_caching/test_local_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def test_build_required_tables_new_format():
assert "connection_costs_for_wind_and_solar" in result
assert "connection_costs_other" in result
assert "efficient_level_of_system_strength_cost" in result
# New entrant generator summary feeds the generators_new_entrant templater
assert "new_entrants_summary" in result


def test_build_required_tables_old_format():
Expand Down
Loading
Loading