Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions src/isp_trace_parser/demand_trace_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from pathlib import Path

from isp_trace_parser import mappings


def build(
files: list[Path],
version: str,
) -> dict[Path, dict[str, str | int]]:
"""Build metadata for demand files by lookup in the demand mapping.

The demand YAML is option-keyed, so `_expand_lookup` first expands the
dimensions into a `(location_prefix, dimensions_suffix)`-keyed dict;
each filename then decomposes into those two literal slices (either
side of `_RefYear_<refyear>_`) for a single lookup.
"""
lookup = _expand_lookup(version)

file_metadata: dict[Path, dict[str, str | int]] = {}
for path in files:
location_prefix, _, after = path.stem.partition("_RefYear_")
refyear, _, dimensions_suffix = after.partition("_")
key = (location_prefix, dimensions_suffix)
if not refyear.isdigit() or key not in lookup:
raise ValueError(f"Unexpected trace filename: {path.name}")
file_metadata[path] = {**lookup[key], "reference_year": int(refyear)}
return file_metadata
Comment on lines +20 to +27

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually found the code in the for loop pretty hard to understand. This is total overkill, but I was curious on how it might be made clearer, so here's what Claude and I came up with. Please, just treat as a comment for you to take or leave as you please.

Suggested change
for path in files:
subregion, sep, after = path.stem.partition("_RefYear_")
if not sep:
raise ValueError(f"Unexpected trace filename: {path.name}")
year_str, _, rest = after.partition("_")
key = f"{subregion}_{rest}"
if not year_str.isdigit() or not rest or key not in lookup:
raise ValueError(f"Unexpected trace filename: {path.name}")
file_metadata[path] = {**lookup[key], "reference_year": int(year_str)}
return file_metadata
for path in files:
reference_year, dimension_key = _parse_filename(path)
if dimension_key not in lookup:
raise ValueError(f"Unexpected trace filename: {path.name}")
file_metadata[path] = {
**lookup[dimension_key],
"reference_year": reference_year,
}
return file_metadata
def _parse_filename(path: Path) -> tuple[int, str]:
"""Split a demand filename into its reference year and dimension key.
`<subregion>_RefYear_<year>_<rest>` -> `(year, "<subregion>_<rest>")`: the
reference year is pulled out and the surviving dimension fields are rejoined
into the key that `_expand_lookup` builds.
"""
name = path.stem # filename minus the .csv suffix
subregion, stamp, after = name.partition("_RefYear_")
year, _, rest = after.partition("_")
if not stamp or not rest or not year.isdigit():
raise ValueError(f"Unexpected trace filename: {path.name}")
return int(year), f"{subregion}_{rest}"

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also might be clearer. Anyway, I'll stop now.

def _parse_filename(path: Path) -> tuple[int, str]:
    """Split a demand filename into its reference year and dimension key.

    `<subregion>_RefYear_<year>_<remaining_dimensions>` -> 
    `(year, "<subregion>_<remaining_dimensions>")`: the
    reference year is pulled out and the surviving dimension fields are rejoined
    into the key that `_expand_lookup` builds."""
    match = re.fullmatch(r"(.+)_RefYear_(\d{4})_(.+)", path.stem)
    if not match:
        raise ValueError(f"Unexpected trace filename: {path.name}")
    subregion, year, remaining_dimensions = match.groups()
    return int(year), f"{subregion}_{remaining_dimensions}"

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Nick - yeah think you are right .. will make some changes (..probably what you've suggested)



def _expand_lookup(version: str) -> dict[tuple[str, str], dict[str, str]]:
"""Expand the demand dimensions into a refyear-agnostic lookup.

Keyed by `(location_prefix, dimensions_suffix)` — the two literal
slices of the filename either side of `_RefYear_<refyear>_`. For 2024,
`location_prefix` is the subregion and `dimensions_suffix` is
`<scenario>_<poe>_<demand_type>`. `reference_year` is added by `build`.
"""
demand = mappings.load("demand", version=version)
topography = mappings.load("topography", version=version)

lookup: dict[tuple[str, str], dict[str, str]] = {}
for subregion in topography["subregions"]:
for scenario in demand["scenarios"]:
for poe in demand["poe_levels"]:
for demand_type in demand["demand_types"]:
key = (subregion, f"{scenario}_{poe}_{demand_type}")
lookup[key] = {
"subregion": subregion,
"scenario": scenario,
"poe": poe,
"demand_type": demand_type,
}
return lookup
27 changes: 7 additions & 20 deletions src/isp_trace_parser/demand_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from joblib import Parallel, delayed
from pydantic import BaseModel, validate_call

from isp_trace_parser import input_validation, mappings
from isp_trace_parser.metadata_extractors import extract_demand_trace_metadata
from isp_trace_parser import demand_trace_metadata, input_validation, mappings
from isp_trace_parser.trace_restructure_helper_functions import (
check_filter_by_metadata,
get_all_filepaths,
Expand Down Expand Up @@ -133,11 +132,13 @@ def parse_demand_traces(
parsed_directory = input_validation.parsed_directory(parsed_directory)

files = get_all_filepaths(input_directory)
file_metadata = demand_trace_metadata.build(files, version="2024")

demand_scenario_mapping = mappings.load("demand_scenario_mapping")
demand_scenario_mapping = mappings.load("demand", version="2024")["scenarios"]

partial_func = functools.partial(
restructure_demand_file,
all_input_file_metadata=file_metadata,
demand_scenario_mapping=demand_scenario_mapping,
output_directory=parsed_directory,
filters=filters,
Expand All @@ -155,6 +156,7 @@ def parse_demand_traces(

def restructure_demand_file(
input_filepath: Path,
all_input_file_metadata: dict[Path, dict[str, str | int]],
demand_scenario_mapping: dict[str, str],
output_directory: Path,
filters: DemandMetadataFilter | None = None,
Expand All @@ -168,6 +170,7 @@ def restructure_demand_file(

Args:
input_filepath: Path object representing the input demand trace file.
all_input_file_metadata: Metadata for all input files.
demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names.
output_directory: Directory where restructured files will be saved.
filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata.
Expand All @@ -188,7 +191,7 @@ def restructure_demand_file(

# This will process the input file and save it in parquet format in the specified output directory
"""
file_metadata = extract_demand_trace_metadata(input_filepath.name)
file_metadata = dict(all_input_file_metadata[input_filepath])

file_metadata["scenario"] = get_save_scenario_for_demand_trace(
file_metadata, demand_scenario_mapping
Expand Down Expand Up @@ -255,19 +258,3 @@ def write_new_demand_filename(metadata: dict[str, str]) -> str:
scenario = m["scenario"].replace(" ", "_")

return f"{scenario}_RefYear{m['reference_year']}_{subregion}_{m['poe']}_{m['demand_type']}.parquet"


def extract_metadata_for_all_demand_files(
filenames: list[Path],
) -> dict[Path, dict[str, str]]:
"""
Extracts metadata for all demand trace files.

Args:
filenames: List of Path objects representing the demand trace files.

Returns:
A dictionary with filepaths as keys and metadata dicts as values.
"""
file_metadata = [extract_demand_trace_metadata(str(f.name)) for f in filenames]
return dict(zip(filenames, file_metadata))
21 changes: 21 additions & 0 deletions src/isp_trace_parser/mappings/2024/demand.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# 2024 ISP demand trace metadata
#(based on the 2024 traces and IASR workbook).
#
# 2024 AEMO demand trace filenames take the form:
# <subregion>_RefYear_<year>_<scenario>_<poe>_<demand_type>.csv
# e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv
#
# Valid filenames are the product of these dimensions (with subregion,
# sourced from topography.yaml, and reference_year).

# scenarios: raw AEMO filename code -> IASR workbook display name.
# Keys appear in the filename; values are used in output filenames and
# in the `scenario` column of the parsed parquet.
scenarios:
STEP_CHANGE: Step Change
PROGRESSIVE_CHANGE: Progressive Change
HYDROGEN_EXPORT: Green Energy Exports

poe_levels: [POE10, POE50]

demand_types: [OPSO_MODELLING, OPSO_MODELLING_PVLITE, PV_TOT]

This file was deleted.

21 changes: 0 additions & 21 deletions src/isp_trace_parser/metadata_extractors.py

This file was deleted.

45 changes: 45 additions & 0 deletions tests/test_demand_trace_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pathlib import Path

import pytest

from isp_trace_parser import demand_trace_metadata


def test_build():
"""Two examples spanning different scenario / poe / demand_type /
subregion values. Every combination resolves through the same single
dict lookup, so two are enough for testing.
"""
files = [
Path("VIC_RefYear_2011_STEP_CHANGE_POE10_OPSO_MODELLING.csv"),
Path("CNSW_RefYear_2023_HYDROGEN_EXPORT_POE50_OPSO_MODELLING_PVLITE.csv"),
]
metadata = demand_trace_metadata.build(files, version="2024")

assert metadata[files[0]] == {
"subregion": "VIC",
"reference_year": 2011,
"scenario": "STEP_CHANGE",
"poe": "POE10",
"demand_type": "OPSO_MODELLING",
}
assert metadata[files[1]] == {
"subregion": "CNSW",
"reference_year": 2023,
"scenario": "HYDROGEN_EXPORT",
"poe": "POE50",
"demand_type": "OPSO_MODELLING_PVLITE",
}


@pytest.mark.parametrize(
"filename",
[
"VIC_2011_STEP_CHANGE_POE10_OPSO_MODELLING.csv", # missing _RefYear_
"VIC_RefYear_201a_STEP_CHANGE_POE10_OPSO_MODELLING.csv", # non-digit year
"VIC_RefYear_2011_MYSTERY_POE10_OPSO_MODELLING.csv", # lookup miss
],
)
def test_build_rejects_unexpected_filename(filename):
with pytest.raises(ValueError, match="Unexpected trace filename"):
demand_trace_metadata.build([Path(filename)], version="2024")
11 changes: 0 additions & 11 deletions tests/test_trace_file_meta_data_extraction.py

This file was deleted.

Loading