From e99b3963fd3605c9a97407092fcf654059a038f5 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Wed, 17 Jun 2026 15:44:45 +1000 Subject: [PATCH 01/12] Add new mapping file for demand data --- src/isp_trace_parser/mappings/2024/demand.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/isp_trace_parser/mappings/2024/demand.yaml diff --git a/src/isp_trace_parser/mappings/2024/demand.yaml b/src/isp_trace_parser/mappings/2024/demand.yaml new file mode 100644 index 0000000..545064e --- /dev/null +++ b/src/isp_trace_parser/mappings/2024/demand.yaml @@ -0,0 +1,15 @@ +# 2024 ISP demand trace metadata +#(based on the 2024 traces and IASR workbook). +# +# 2024 AEMO demand trace filenames take the form: +# _RefYear____.csv +# e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv +# +# Valid filename are the product of these following options ( +# with subregion, sourced from topography.yaml) and reference_year. + +scenarios: [STEP_CHANGE, PROGRESSIVE_CHANGE, HYDROGEN_EXPORT] + +poe_levels: [POE10, POE50] + +demand_types: [OPSO_MODELLING, OPSO_MODELLING_PVLITE, PV_TOT] From d03195beb4b855bc1438b469e1528ccc57e3bdf7 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Wed, 17 Jun 2026 15:50:25 +1000 Subject: [PATCH 02/12] Update demand yaml to correctly map IASR names --- src/isp_trace_parser/mappings/2024/demand.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/isp_trace_parser/mappings/2024/demand.yaml b/src/isp_trace_parser/mappings/2024/demand.yaml index 545064e..54bcf0a 100644 --- a/src/isp_trace_parser/mappings/2024/demand.yaml +++ b/src/isp_trace_parser/mappings/2024/demand.yaml @@ -8,7 +8,13 @@ # Valid filename are the product of these following options ( # with subregion, sourced from topography.yaml) and reference_year. -scenarios: [STEP_CHANGE, PROGRESSIVE_CHANGE, HYDROGEN_EXPORT] +# scenarios: raw AEMO filename code -> IASR workbook display name. +# Keys appear in the filename; values are used in output filenames and +# in the `scenario` column of the parsed parquet. +scenarios: + STEP_CHANGE: Step Change + PROGRESSIVE_CHANGE: Progressive Change + HYDROGEN_EXPORT: Green Energy Exports poe_levels: [POE10, POE50] From 0b1e4309215a3169812a5d1c8dd70ddbc61439e0 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Wed, 17 Jun 2026 15:51:07 +1000 Subject: [PATCH 03/12] Deleted old scenario mapping --- .../mappings/2024/demand_scenario_mapping.yaml | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 src/isp_trace_parser/mappings/2024/demand_scenario_mapping.yaml diff --git a/src/isp_trace_parser/mappings/2024/demand_scenario_mapping.yaml b/src/isp_trace_parser/mappings/2024/demand_scenario_mapping.yaml deleted file mode 100644 index de7d1a7..0000000 --- a/src/isp_trace_parser/mappings/2024/demand_scenario_mapping.yaml +++ /dev/null @@ -1,3 +0,0 @@ -HYDROGEN_EXPORT: Green Energy Exports -STEP_CHANGE: Step Change -PROGRESSIVE_CHANGE: Progressive Change From f9d40134f56bb1ebba699a21db27a5a1ee3eebd4 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Wed, 17 Jun 2026 15:52:26 +1000 Subject: [PATCH 04/12] Completely removed old regex extractors --- src/isp_trace_parser/metadata_extractors.py | 21 ------------------- tests/test_trace_file_meta_data_extraction.py | 11 ---------- 2 files changed, 32 deletions(-) delete mode 100644 src/isp_trace_parser/metadata_extractors.py delete mode 100644 tests/test_trace_file_meta_data_extraction.py diff --git a/src/isp_trace_parser/metadata_extractors.py b/src/isp_trace_parser/metadata_extractors.py deleted file mode 100644 index a99b450..0000000 --- a/src/isp_trace_parser/metadata_extractors.py +++ /dev/null @@ -1,21 +0,0 @@ -import re - - -def extract_demand_trace_metadata(filename): - # Regex pattern to match the structure of the filename - pattern = re.compile( - r"^(?P[A-Z]+)_RefYear_(?P\d{4})_(?P[A-Z_]+)_(?PPOE\d{2})_(?P[" - r"A-Z_]+)\.csv$" - ) - - # Match the pattern against the filename - match = pattern.match(filename) - - if match: - # If the filename matches the pattern, return a dictionary of captured groups - match_data = match.groupdict() - match_data["reference_year"] = int(match_data["reference_year"]) - return match_data - else: - # If the pattern does not match, raise an error or return None - raise ValueError(f"Filename '{filename}' does not match the expected pattern") diff --git a/tests/test_trace_file_meta_data_extraction.py b/tests/test_trace_file_meta_data_extraction.py deleted file mode 100644 index 49346cf..0000000 --- a/tests/test_trace_file_meta_data_extraction.py +++ /dev/null @@ -1,11 +0,0 @@ -from isp_trace_parser import metadata_extractors - - -def test_demand_trace_metadata_extraction(): - file_name = "VIC_RefYear_2011_STEP_CHANGE_POE10_OPSO_MODELLING.csv" - metadata = metadata_extractors.extract_demand_trace_metadata(file_name) - assert metadata["subregion"] == "VIC" - assert metadata["reference_year"] == 2011 - assert metadata["scenario"] == "STEP_CHANGE" - assert metadata["poe"] == "POE10" - assert metadata["demand_type"] == "OPSO_MODELLING" From 7bb55b354fd01d994847b5678dd509dbc84d2a79 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Wed, 17 Jun 2026 21:33:42 +1000 Subject: [PATCH 05/12] Added tests for demand_trace_metadata --- tests/test_demand_trace_metadata.py | 45 +++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/test_demand_trace_metadata.py diff --git a/tests/test_demand_trace_metadata.py b/tests/test_demand_trace_metadata.py new file mode 100644 index 0000000..93a3499 --- /dev/null +++ b/tests/test_demand_trace_metadata.py @@ -0,0 +1,45 @@ +from pathlib import Path + +import pytest + +from isp_trace_parser import demand_trace_metadata + + +def test_build(): + """Two examples spanning different scenario / poe / demand_type / + subregion values. Every combination resolves through the same single + dict lookup, so two are enough for testing. + """ + files = [ + Path("VIC_RefYear_2011_STEP_CHANGE_POE10_OPSO_MODELLING.csv"), + Path("CNSW_RefYear_2023_HYDROGEN_EXPORT_POE50_OPSO_MODELLING_PVLITE.csv"), + ] + metadata = demand_trace_metadata.build(files, version="2024") + + assert metadata[files[0]] == { + "subregion": "VIC", + "reference_year": 2011, + "scenario": "STEP_CHANGE", + "poe": "POE10", + "demand_type": "OPSO_MODELLING", + } + assert metadata[files[1]] == { + "subregion": "CNSW", + "reference_year": 2023, + "scenario": "HYDROGEN_EXPORT", + "poe": "POE50", + "demand_type": "OPSO_MODELLING_PVLITE", + } + + +@pytest.mark.parametrize( + "filename", + [ + "VIC_2011_STEP_CHANGE_POE10_OPSO_MODELLING.csv", # missing _RefYear_ + "VIC_RefYear_201a_STEP_CHANGE_POE10_OPSO_MODELLING.csv", # non-digit year + "VIC_RefYear_2011_MYSTERY_POE10_OPSO_MODELLING.csv", # lookup miss + ], +) +def test_build_rejects_unexpected_filename(filename): + with pytest.raises(ValueError, match="Unexpected trace filename"): + demand_trace_metadata.build([Path(filename)], version="2024") From 1ab91924bcb31d1059751ae864a170a68ccf4c96 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Thu, 18 Jun 2026 09:55:04 +1000 Subject: [PATCH 06/12] Remove old regex extrator method from demand_traces (and use metadata dict) (Note, as metadata no longer derived from the filename inside the function (via regex) looked up from a stem-keyed dict built once by demand_tracee_metadata.build(), so that dict needs to be passed in to the function). Mirrors resource trace approach --- src/isp_trace_parser/demand_traces.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 97cf2d1..f427bff 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -7,8 +7,7 @@ from joblib import Parallel, delayed from pydantic import BaseModel, validate_call -from isp_trace_parser import input_validation, mappings -from isp_trace_parser.metadata_extractors import extract_demand_trace_metadata +from isp_trace_parser import demand_trace_metadata, input_validation, mappings from isp_trace_parser.trace_restructure_helper_functions import ( check_filter_by_metadata, get_all_filepaths, @@ -133,11 +132,13 @@ def parse_demand_traces( parsed_directory = input_validation.parsed_directory(parsed_directory) files = get_all_filepaths(input_directory) + file_metadata = demand_trace_metadata.build(files, version="2024") - demand_scenario_mapping = mappings.load("demand_scenario_mapping") + demand_scenario_mapping = mappings.load("demand", version="2024")["scenarios"] partial_func = functools.partial( restructure_demand_file, + all_input_file_metadata=file_metadata, demand_scenario_mapping=demand_scenario_mapping, output_directory=parsed_directory, filters=filters, @@ -155,6 +156,7 @@ def parse_demand_traces( def restructure_demand_file( input_filepath: Path, + all_input_file_metadata: dict[Path, dict[str, str | int]], demand_scenario_mapping: dict[str, str], output_directory: Path, filters: DemandMetadataFilter | None = None, @@ -188,7 +190,7 @@ def restructure_demand_file( # This will process the input file and save it in parquet format in the specified output directory """ - file_metadata = extract_demand_trace_metadata(input_filepath.name) + file_metadata = dict(all_input_file_metadata[input_filepath]) file_metadata["scenario"] = get_save_scenario_for_demand_trace( file_metadata, demand_scenario_mapping @@ -255,19 +257,3 @@ def write_new_demand_filename(metadata: dict[str, str]) -> str: scenario = m["scenario"].replace(" ", "_") return f"{scenario}_RefYear{m['reference_year']}_{subregion}_{m['poe']}_{m['demand_type']}.parquet" - - -def extract_metadata_for_all_demand_files( - filenames: list[Path], -) -> dict[Path, dict[str, str]]: - """ - Extracts metadata for all demand trace files. - - Args: - filenames: List of Path objects representing the demand trace files. - - Returns: - A dictionary with filepaths as keys and metadata dicts as values. - """ - file_metadata = [extract_demand_trace_metadata(str(f.name)) for f in filenames] - return dict(zip(filenames, file_metadata)) From cddc83e75b0fe1937db31a7f719790d78789c3d1 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Thu, 18 Jun 2026 10:02:44 +1000 Subject: [PATCH 07/12] Updated docstring for restructure_demand_file --- src/isp_trace_parser/demand_traces.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index f427bff..985ba08 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -170,6 +170,8 @@ def restructure_demand_file( Args: input_filepath: Path object representing the input demand trace file. + all_input_file_metadata: Pre-built dict mapping every input filepath to its metadata (subregion, + reference_year, scenario, poe, demand_type). demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names. output_directory: Directory where restructured files will be saved. filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata. From e6b4fca08645f77d861bd7552ed518d8f6aa582f Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Thu, 18 Jun 2026 10:12:51 +1000 Subject: [PATCH 08/12] Minor edit to doc str --- src/isp_trace_parser/demand_traces.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 985ba08..9703fe6 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -170,8 +170,7 @@ def restructure_demand_file( Args: input_filepath: Path object representing the input demand trace file. - all_input_file_metadata: Pre-built dict mapping every input filepath to its metadata (subregion, - reference_year, scenario, poe, demand_type). + all_input_file_metadata: Metadata for all input files. demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names. output_directory: Directory where restructured files will be saved. filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata. From 5e7e2808b9142608d727271cc09ae1bffca93955 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Thu, 18 Jun 2026 10:13:52 +1000 Subject: [PATCH 09/12] Minor change to doc string --- src/isp_trace_parser/mappings/2024/demand.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/isp_trace_parser/mappings/2024/demand.yaml b/src/isp_trace_parser/mappings/2024/demand.yaml index 54bcf0a..57801bd 100644 --- a/src/isp_trace_parser/mappings/2024/demand.yaml +++ b/src/isp_trace_parser/mappings/2024/demand.yaml @@ -5,8 +5,8 @@ # _RefYear____.csv # e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv # -# Valid filename are the product of these following options ( -# with subregion, sourced from topography.yaml) and reference_year. +# Valid filenames are the product of these dimensions (with subregion, +# sourced from topography.yaml, and reference_year). # scenarios: raw AEMO filename code -> IASR workbook display name. # Keys appear in the filename; values are used in output filenames and From cec23b88af541922208afec8c429f612d330f705 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Thu, 18 Jun 2026 10:14:47 +1000 Subject: [PATCH 10/12] Added demand_trace_metadata.py - similar to resource_trace_metadata.py --- src/isp_trace_parser/demand_trace_metadata.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/isp_trace_parser/demand_trace_metadata.py diff --git a/src/isp_trace_parser/demand_trace_metadata.py b/src/isp_trace_parser/demand_trace_metadata.py new file mode 100644 index 0000000..1c49f9b --- /dev/null +++ b/src/isp_trace_parser/demand_trace_metadata.py @@ -0,0 +1,52 @@ +from pathlib import Path + +from isp_trace_parser import mappings + + +def build( + files: list[Path], + version: str, +) -> dict[Path, dict[str, str | int]]: + """Build metadata for demand files by lookup in the demand mapping. + + The demand YAML is option-keyed, so `_expand_lookup` first expands the + dimensions into a stem-keyed dict; each filename then decomposes + into (subregion, year, scenario_poe_demand_type) for a single lookup. + """ + lookup = _expand_lookup(version) + + file_metadata: dict[Path, dict[str, str | int]] = {} + for path in files: + subregion, sep, after = path.stem.partition("_RefYear_") + if not sep: + raise ValueError(f"Unexpected trace filename: {path.name}") + year_str, _, rest = after.partition("_") + key = f"{subregion}_{rest}" + if not year_str.isdigit() or not rest or key not in lookup: + raise ValueError(f"Unexpected trace filename: {path.name}") + file_metadata[path] = {**lookup[key], "reference_year": int(year_str)} + return file_metadata + + +def _expand_lookup(version: str) -> dict[str, dict[str, str]]: + """Expand the demand dimensions into a year-agnostic stem-keyed dict. + + Keyed by `___` (i.e. the stem + with `_RefYear__` removed). `reference_year` is added by `build`. + """ + demand = mappings.load("demand", version=version) + topography = mappings.load("topography", version=version) + + lookup: dict[str, dict[str, str]] = {} + for subregion in topography["subregions"]: + for scenario in demand["scenarios"].keys(): + for poe in demand["poe_levels"]: + for demand_type in demand["demand_types"]: + key = f"{subregion}_{scenario}_{poe}_{demand_type}" + lookup[key] = { + "subregion": subregion, + "scenario": scenario, + "poe": poe, + "demand_type": demand_type, + } + return lookup From efbdbb23e92ecf9d52db28160da6c808b9fce6d5 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Sun, 21 Jun 2026 11:07:12 +1000 Subject: [PATCH 11/12] Simplify and tighten demand filename parsing Address review feedback from Nick (#56) - Simplify the parse loop: drop redundant `if not sep` check - Rename for clarity - removed synethic rejoin --- src/isp_trace_parser/demand_trace_metadata.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/isp_trace_parser/demand_trace_metadata.py b/src/isp_trace_parser/demand_trace_metadata.py index 1c49f9b..3e1ff79 100644 --- a/src/isp_trace_parser/demand_trace_metadata.py +++ b/src/isp_trace_parser/demand_trace_metadata.py @@ -10,39 +10,40 @@ def build( """Build metadata for demand files by lookup in the demand mapping. The demand YAML is option-keyed, so `_expand_lookup` first expands the - dimensions into a stem-keyed dict; each filename then decomposes - into (subregion, year, scenario_poe_demand_type) for a single lookup. + dimensions into a `(location_prefix, dimensions_suffix)`-keyed dict; + each filename then decomposes into those two literal slices (either + side of `_RefYear__`) for a single lookup. """ lookup = _expand_lookup(version) file_metadata: dict[Path, dict[str, str | int]] = {} for path in files: - subregion, sep, after = path.stem.partition("_RefYear_") - if not sep: + location_prefix, _, after = path.stem.partition("_RefYear_") + year, _, dimensions_suffix = after.partition("_") + key = (location_prefix, dimensions_suffix) + if not year.isdigit() or key not in lookup: raise ValueError(f"Unexpected trace filename: {path.name}") - year_str, _, rest = after.partition("_") - key = f"{subregion}_{rest}" - if not year_str.isdigit() or not rest or key not in lookup: - raise ValueError(f"Unexpected trace filename: {path.name}") - file_metadata[path] = {**lookup[key], "reference_year": int(year_str)} + file_metadata[path] = {**lookup[key], "reference_year": int(year)} return file_metadata -def _expand_lookup(version: str) -> dict[str, dict[str, str]]: - """Expand the demand dimensions into a year-agnostic stem-keyed dict. +def _expand_lookup(version: str) -> dict[tuple[str, str], dict[str, str]]: + """Expand the demand dimensions into a year-agnostic lookup. - Keyed by `___` (i.e. the stem - with `_RefYear__` removed). `reference_year` is added by `build`. + Keyed by `(location_prefix, dimensions_suffix)` — the two literal + slices of the filename either side of `_RefYear__`. For 2024, + `location_prefix` is the subregion and `dimensions_suffix` is + `__`. `reference_year` is added by `build`. """ demand = mappings.load("demand", version=version) topography = mappings.load("topography", version=version) - lookup: dict[str, dict[str, str]] = {} + lookup: dict[tuple[str, str], dict[str, str]] = {} for subregion in topography["subregions"]: - for scenario in demand["scenarios"].keys(): + for scenario in demand["scenarios"]: for poe in demand["poe_levels"]: for demand_type in demand["demand_types"]: - key = f"{subregion}_{scenario}_{poe}_{demand_type}" + key = (subregion, f"{scenario}_{poe}_{demand_type}") lookup[key] = { "subregion": subregion, "scenario": scenario, From 126bb9eb0ba0f2d5caeb1cc0ecf5ede9dcda9c31 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Sun, 21 Jun 2026 19:29:49 +1000 Subject: [PATCH 12/12] clarified name (renamed year--> refyear) --- src/isp_trace_parser/demand_trace_metadata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/isp_trace_parser/demand_trace_metadata.py b/src/isp_trace_parser/demand_trace_metadata.py index 3e1ff79..c0124ef 100644 --- a/src/isp_trace_parser/demand_trace_metadata.py +++ b/src/isp_trace_parser/demand_trace_metadata.py @@ -12,26 +12,26 @@ def build( The demand YAML is option-keyed, so `_expand_lookup` first expands the dimensions into a `(location_prefix, dimensions_suffix)`-keyed dict; each filename then decomposes into those two literal slices (either - side of `_RefYear__`) for a single lookup. + side of `_RefYear__`) for a single lookup. """ lookup = _expand_lookup(version) file_metadata: dict[Path, dict[str, str | int]] = {} for path in files: location_prefix, _, after = path.stem.partition("_RefYear_") - year, _, dimensions_suffix = after.partition("_") + refyear, _, dimensions_suffix = after.partition("_") key = (location_prefix, dimensions_suffix) - if not year.isdigit() or key not in lookup: + if not refyear.isdigit() or key not in lookup: raise ValueError(f"Unexpected trace filename: {path.name}") - file_metadata[path] = {**lookup[key], "reference_year": int(year)} + file_metadata[path] = {**lookup[key], "reference_year": int(refyear)} return file_metadata def _expand_lookup(version: str) -> dict[tuple[str, str], dict[str, str]]: - """Expand the demand dimensions into a year-agnostic lookup. + """Expand the demand dimensions into a refyear-agnostic lookup. Keyed by `(location_prefix, dimensions_suffix)` — the two literal - slices of the filename either side of `_RefYear__`. For 2024, + slices of the filename either side of `_RefYear__`. For 2024, `location_prefix` is the subregion and `dimensions_suffix` is `__`. `reference_year` is added by `build`. """