Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/ispypsa/iasr_table_caching/local_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ def _build_required_tables(iasr_workbook_version: str) -> list[str]:
"efficient_level_of_system_strength_cost",
"existing_committed_anticipated_additional_generator_summary",
"new_entrants_summary",
"fixed_opex_new_entrants",
"variable_opex_new_entrants",
"lead_time_and_project_life",
"heat_rates_new_entrants",
"gpg_min_stable_level_new_entrants",
"battery_properties",
"pumped_hydro_new_entrant_properties",
] + augmentation

else:
Expand Down
8 changes: 4 additions & 4 deletions src/ispypsa/templater/connection_and_build_costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from ispypsa.templater.helpers import (
_financial_year_string_to_end_year_int,
_fuzzy_map_to_canonical,
_fuzzy_map_to_allowed_values,
_looks_like_financial_year,
_where_any_substring_appears,
)
Expand Down Expand Up @@ -443,7 +443,7 @@ def _canonicalise_non_vre_technologies(
# BOTN - Cethana row dropped (listed in ``_NON_VRE_EXCLUDED_TECHNOLOGIES``)
"""
# NOTE: an only-VRE canonical set (non-empty, but no non-VRE techs) still raises
# via _fuzzy_map_to_canonical when the forecast has non-VRE rows.
# via _fuzzy_map_to_allowed_values when the forecast has non-VRE rows.
# Intentional (for now) — kinda related to https://github.com/Open-ISP/ISPyPSA/discussions/103 and the final role(s) of validator.
if not canonical_technologies:
return pd.DataFrame(columns=df.columns).astype(df.dtypes)
Expand All @@ -452,7 +452,7 @@ def _canonicalise_non_vre_technologies(
df["technology"], _NON_VRE_EXCLUDED_TECHNOLOGIES
)
result = df.loc[~excluded].copy()
result["technology"] = _fuzzy_map_to_canonical(
result["technology"] = _fuzzy_map_to_allowed_values(
result["technology"],
canonical_technologies,
task_desc="canonicalising non-VRE connection cost `technology` values",
Expand Down Expand Up @@ -913,7 +913,7 @@ def _filter_table_by_isp_scenario(
NSW 100 150
"""
table = table.copy()
table[scenario_col_name] = _fuzzy_map_to_canonical(
table[scenario_col_name] = _fuzzy_map_to_allowed_values(
table[scenario_col_name],
_ISP_SCENARIOS_NEW,
task_desc=f"filtering {table_desc} table by ISP scenario",
Expand Down
10 changes: 3 additions & 7 deletions src/ispypsa/templater/create_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,9 @@ def create_ispypsa_inputs_template(
"connection_capacity_non_vre"
].copy()

# Identity columns only for now - not yet a templater output
generators_new_entrant = _template_generators_new_entrant(
iasr_tables["new_entrants_summary"]
)
storage_new_entrant = _template_storage_new_entrant(
iasr_tables["new_entrants_summary"]
)
# Not yet a templater output - fed into connection cost templating below.
generators_new_entrant = _template_generators_new_entrant(iasr_tables)
storage_new_entrant = _template_storage_new_entrant(iasr_tables)
template["costs_connection"] = _template_connection_costs(
iasr_tables,
scenario,
Expand Down
22 changes: 12 additions & 10 deletions src/ispypsa/templater/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _best_fuzzy_match(value: str, choices: Iterable[str], threshold: int) -> str
return best_choice if best_score >= threshold else None


def _fuzzy_map_to_canonical(
def _fuzzy_map_to_allowed_values(
name_series: pd.Series,
choices: Iterable[str],
task_desc: str,
Expand All @@ -163,23 +163,22 @@ def _fuzzy_map_to_canonical(
"""Maps each value in name_series to the closest match in choices (many-to-one).

Unlike _fuzzy_match_names, choices are not consumed — multiple input values can
map to the same canonical name. Successful fuzzy corrections are logged at INFO
map to the same allowed value. Successful fuzzy corrections are logged at INFO
via _log_fuzzy_match. Any remaining values scoring below ``threshold`` after
matching raises a ValueError.

Args:
name_series: Series of names to map.
choices: Canonical names to match against.
choices: Allowed values to match against.
task_desc: Description included in log messages.
threshold: Minimum fuzz.ratio score (0–100) to accept a replacement. Default 85.

Returns:
Series with values replaced by the closest match where score >= threshold.

Raises:
ValueError: if any unmatched values remain - values that aren't able to be
canonicalised (but are expected to be) flag potential inconsistencies across
datasets.
ValueError: if any values in ``name_series`` do not have a match scoring
above ``threshold`` in ``choices``.

I/O Examples:
name_series: ["Step Change", "Step Chaneg", "Step Change"]
Expand All @@ -201,15 +200,18 @@ def _fuzzy_map_to_canonical(
"""
canonical = set(choices)
match_dict = {
v: _best_fuzzy_match(v, canonical, threshold) for v in name_series.unique()
name: _best_fuzzy_match(name, canonical, threshold)
for name in name_series.unique()
}
matched = name_series.map(
lambda v: match_dict[v] if match_dict[v] is not None else v
lambda name: match_dict[name] if match_dict[name] is not None else name
)
_log_fuzzy_match(name_series, matched, task_desc)
unmatched = sorted(k for k, v in match_dict.items() if v is None)
unmatched = sorted(name for name, match in match_dict.items() if match is None)
if unmatched:
msg = f"Could not fuzzy match to a canonical value whilst {task_desc}: {unmatched}"
msg = (
f"Could not fuzzy match to an allowed value whilst {task_desc}: {unmatched}"
)
raise ValueError(msg)
return matched

Expand Down
107 changes: 107 additions & 0 deletions src/ispypsa/templater/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,113 @@
for opex mapping to rename columns in the table.
"""

# - New-format (flag: use_new_table_format=True) per-technology property merge maps -

# Property entries shared by new entrant generators and storage: each looked up by
# technology from the same IASR table for both.
_COMMON_NEW_ENTRANT_PROPERTY_MAP = {
"fom": dict(
table="fixed_opex_new_entrants",
technology_col="Technology Type",
# NOTE: literal double ")" — parsed directly from the v7.5 IASR workbook
value_col="Base value ($/kW/year))",
scale=1000.0,
),
"lifetime_technical": dict(
table="lead_time_and_project_life",
technology_col="Technology",
value_col="Technical life (years)",
),
"lifetime_economic": dict(
table="lead_time_and_project_life",
technology_col="Technology",
value_col="Economic life (years)",
),
"minimum_stable_level": dict(
table="gpg_min_stable_level_new_entrants",
technology_col="Technology",
value_col="Min Stable Level (% of nameplate)",
),
}

_GENERATORS_NEW_ENTRANT_PROPERTY_MAP = {
**_COMMON_NEW_ENTRANT_PROPERTY_MAP,
"vom": dict(
table="variable_opex_new_entrants",
technology_col="Generator",
value_col="Base value",
),
"heat_rate": dict(
table="heat_rates_new_entrants",
technology_col="Technology",
value_col="Heat rate (GJ/MWh)",
),
}

_STORAGE_BATTERY_PROPERTY_MAP = {
"storage_hours": dict(
table="battery_properties",
technology_col="Technology",
value_col="Energy capacity_Hours",
),
"efficiency_charge": dict(
table="battery_properties",
technology_col="Technology",
value_col="Charge efficiency_%",
),
"efficiency_discharge": dict(
table="battery_properties",
technology_col="Technology",
value_col="Discharge efficiency_%",
),
"soc_max": dict(
table="battery_properties",
technology_col="Technology",
value_col="Allowable max state of charge_%",
),
"soc_min": dict(
table="battery_properties",
technology_col="Technology",
value_col="Allowable min state of charge_%",
),
"degradation_annual": dict(
table="battery_properties",
technology_col="Technology",
value_col="Annual degradation_%",
),
}

# PHES properties are keyed by "Power Station / Technology" — usually the technology, but
# 'BOTN - Cethana' carries its own row.
_STORAGE_PHES_PROPERTY_MAP = {
"storage_hours": dict(
table="pumped_hydro_new_entrant_properties",
technology_col="Power Station / Technology",
value_col="Storage capacity (hours)",
),
"round_trip_efficiency": dict(
table="pumped_hydro_new_entrant_properties",
technology_col="Power Station / Technology",
value_col="Pumping efficiency (%)",
),
}
"""
New entrant property columns (keys) mapped to the IASR table and columns that contain
property values and the technology for which the values apply. Consumed by
``ispypsa.templater.new_entrants`` via ``_merge_properties``.

`table`: IASR table name holding the named property (key)
`technology_col`: column in the IASR table that contains the 'technology' string.
This is the column used to merge on (after mapping to canonical values).
`value_col`: column holding the value to merge in
`scale`: amount by which to multiply the value (default 1.0), used for unit
conversions. e.g. 1000 for $/kW → $/MW

``_COMMON_...`` holds the entries shared by generators and storage; the generator and
battery maps spread it / sit alongside it, and the storage orchestrator merges it onto
the combined battery + PHES rows.
"""

_ECAA_STORAGE_STATIC_PROPERTY_TABLE_MAP = {
"maximum_capacity_mw": dict(
table=[f"maximum_capacity_{gen_type}" for gen_type in _ECAA_BATTERY_TYPES],
Expand Down
Loading
Loading