Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions create_trace_mapping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import yaml
from nemosis import static_table

from generator_to_trace_draft_mapper import (
draft_solar_generator_to_trace_mapping,
draft_solar_rez_mapping,
Expand All @@ -9,6 +7,7 @@
get_all_generators,
gets_rezs,
)
from nemosis import static_table

workbook = "D:/isp_2024_data/2024-isp-inputs-and-assumptions-workbook.xlsx"
all_generators = get_all_generators(workbook)
Expand Down
144 changes: 0 additions & 144 deletions generator_to_trace_draft_mapper.py

This file was deleted.

59 changes: 0 additions & 59 deletions src/isp_trace_parser/metadata_extractors.py

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generator_to_trace_draft_mapper.py imported extract_solar_trace_metadata and extract_wind_trace_metadata. So maybe we should just delete generator_to_trace_draft_mapper.py along with these functions?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah good point - deleted.

Original file line number Diff line number Diff line change
@@ -1,65 +1,6 @@
import re


def extract_solar_trace_metadata(filename):
# Case 1: Match filenames that have a name, a tech, followed by RefYear
pattern1 = re.compile(
r"^(?P<name>[A-Za-z0-9_\-]+)_(?P<resource_type>[A-Z]+)_RefYear(?P<reference_year>\d{4})\.csv$"
)

# Case 2: Match filenames that have a rez, a name, and tech, followed by RefYear
pattern2 = re.compile(
r"^[A-Z]+_(?P<name>[A-Z0-9]+)_[A-Za-z0-9_\-]+_(?P<resource_type>[A-Z]+)_RefYear(?P<reference_year>\d{4})\.csv$"
)

# Try to match with pattern 2 first
match2 = pattern2.match(filename)
if match2:
match_data = match2.groupdict()
match_data["file_type"] = "zone"
match_data["reference_year"] = int(match_data["reference_year"])
return match_data

# Otherwise, try to match with pattern 1 (just name and year)
match1 = pattern1.match(filename)
if match1:
match_data = match1.groupdict()
match_data["file_type"] = "project"
match_data["reference_year"] = int(match_data["reference_year"])
return match_data

raise ValueError(f"Filename '{filename}' does not match the expected pattern")


def extract_wind_trace_metadata(filename):
# Case 1: Match filenames that have a simple name followed by RefYear
pattern1 = re.compile(r"^(?P<name>.*)_RefYear(?P<reference_year>\d{4})\.csv$")

# Case 2: Match filenames that have a resource type and a name followed by RefYear
pattern2 = re.compile(
r"^(?P<name>[A-Z0-9]+)_(?P<resource_type>W[A-Z]+)_[A-Za-z_\-]+_RefYear(?P<reference_year>\d{4})\.csv$"
)

# Try to match with pattern 2 first
match2 = pattern2.match(filename)
if match2:
match_data = match2.groupdict()
match_data["file_type"] = "zone"
match_data["reference_year"] = int(match_data["reference_year"])
return match_data

# Otherwise, try to match with pattern 1 (just name and year)
match1 = pattern1.match(filename)
if match1:
match_data = match1.groupdict()
match_data["file_type"] = "project"
match_data["resource_type"] = "WIND"
match_data["reference_year"] = int(match_data["reference_year"])
return match_data

raise ValueError(f"Filename '{filename}' does not match the expected pattern")


def extract_demand_trace_metadata(filename):
# Regex pattern to match the structure of the filename
pattern = re.compile(
Expand Down
44 changes: 44 additions & 0 deletions src/isp_trace_parser/resource_trace_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from pathlib import Path

from isp_trace_parser import mappings

# Temporrary mapping that translates the YAML's resource_type vocabulary to the legacy
# short codes still used downstream for now (filters, parquet columns, output filenames).

_RESOURCE_TYPE_CODES: dict[str, str] = {
"solar_sat": "SAT",
"solar_ffp": "FFP",
"solar_cst": "CST",
"wind": "WIND",
"wind_high": "WH",
"wind_medium": "WM",
"wind_offshore_fixed": "WFX",
"wind_offshore_floating": "WFL",
}


def build(
files: list[Path],
version: str,
) -> dict[Path, dict[str, str | int]]:
"""Build metadata for resource files by lookup in the resource mapping.

The mapping key is the trace stem (the filename with `_RefYear<year>.csv`
stripped) so `<stem>_RefYear<year>.csv` decomposes back to (stem, year).
"""

resource_mapping = mappings.load("resources", version=version)

file_metadata: dict[Path, dict[str, str]] = {}
for path in files:
stem, sep, ref = path.stem.rpartition("_RefYear")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth adding error handling for either file name not matching expected format with _RefYear, or filename missing from resource_mapping. I guess these are errors which should only be found by devs at parser update time, so maybe it's not worth it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yes. I think you're right that it should be found at update time (at least with respect to resource_mapping).

That said maybe someone might tinker with filenames or something on their computer (for some reason?) - will add in a little error handling ("Unexpected trace filename") or similar.

if not sep or not ref.isdigit() or stem not in resource_mapping:
raise ValueError(f"Unexpected trace filename: {path.name}")
entry = resource_mapping[stem]
file_metadata[path] = {
"name": entry["location"],
"reference_year": int(ref),
"resource_type": _RESOURCE_TYPE_CODES[entry["resource_type"]],
"file_type": entry["location_type"],
}
return file_metadata
21 changes: 2 additions & 19 deletions src/isp_trace_parser/solar_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from joblib import Parallel, delayed
from pydantic import BaseModel, validate_call

from isp_trace_parser import input_validation, mappings
from isp_trace_parser.metadata_extractors import extract_solar_trace_metadata
from isp_trace_parser import input_validation, mappings, resource_trace_metadata
from isp_trace_parser.trace_restructure_helper_functions import (
check_filter_by_metadata,
get_all_filepaths,
Expand Down Expand Up @@ -134,7 +133,7 @@ def parse_solar_traces(
parsed_directory = input_validation.parsed_directory(parsed_directory)

files = get_all_filepaths(input_directory)
file_metadata = extract_metadata_for_all_solar_files(files)
file_metadata = resource_trace_metadata.build(files, version="2024")
resource_mapping = mappings.load("resources")

project_name_mapping = {
Expand Down Expand Up @@ -264,22 +263,6 @@ def write_output_solar_filename(metadata: dict[str, str]) -> str:
return f"RefYear{m['reference_year']}_{name}_{m['resource_type']}.parquet"


def extract_metadata_for_all_solar_files(
filepaths: list[Path],
) -> dict[Path, dict[str, str]]:
"""
Extracts metadata for all solar trace files.

Args:
filepaths: List of Path objects representing the solar trace files.

Returns:
A dictionary with filepaths as keys and metadata dicts as values.
"""
file_metadata = [extract_solar_trace_metadata(str(f.name)) for f in filepaths]
return dict(zip(filepaths, file_metadata))


def get_unique_resource_types_in_metadata(
metadata_for_trace_files: dict[Path, dict[str, str]],
) -> list[str]:
Expand Down
15 changes: 2 additions & 13 deletions src/isp_trace_parser/wind_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from joblib import Parallel, delayed
from pydantic import BaseModel, validate_call

from isp_trace_parser import input_validation, mappings
from isp_trace_parser.metadata_extractors import extract_wind_trace_metadata
from isp_trace_parser import input_validation, mappings, resource_trace_metadata
from isp_trace_parser.trace_restructure_helper_functions import (
check_filter_by_metadata,
filter_mapping_by_names_in_input_files,
Expand Down Expand Up @@ -135,7 +134,7 @@ def parse_wind_traces(
parsed_directory = input_validation.parsed_directory(parsed_directory)

files = get_all_filepaths(input_directory)
file_metadata = extract_metadata_for_all_wind_files(files)
file_metadata = resource_trace_metadata.build(files, version="2024")

resource_mapping = mappings.load("resources")
zone_name_mappings = {
Expand Down Expand Up @@ -320,16 +319,6 @@ def write_output_wind_zone_filename(metadata: dict) -> str:
return f"RefYear{m['reference_year']}_{name}_{m['resource_type']}.parquet"


def extract_metadata_for_all_wind_files(filepaths: list) -> dict:
"""
Extracts metadata for all wind trace files.

Returns a dict with filepaths as keys and metadata dicts as values.
"""
file_metadata = [extract_wind_trace_metadata(str(f.name)) for f in filepaths]
return dict(zip(filepaths, file_metadata))


def get_unique_resource_types_in_metadata(
metadata_for_trace_files: dict[str:str],
) -> list:
Expand Down
40 changes: 40 additions & 0 deletions tests/test_resource_trace_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pathlib import Path

import pytest

from isp_trace_parser import resource_trace_metadata


def test_build():
"""One test covers function logic compared with regex approach

Solar zones / wind zones / extra reference years add no new code-path
coverage (they're different YAML rows, not different code)
"""
files = [
Path("Adelaide_Desal_FFP_RefYear2011.csv"),
Path("BLUFF1_RefYear2011.csv"),
]
metadata = resource_trace_metadata.build(files, version="2024")

assert metadata[files[0]] == {
"name": "Adelaide_Desal",
"reference_year": 2011,
"resource_type": "FFP",
"file_type": "project",
}
assert metadata[files[1]]["resource_type"] == "WIND"


@pytest.mark.parametrize(
"filename",
[
"Adelaide_Desal.csv", # missing _RefYear separator
"Adelaide_Desal_RefYear.csv", # missing year
"Adelaide_Desal_RefYear2011a.csv", # non-digit year
"Mystery_Plant_RefYear2011.csv", # stem not in mapping
],
)
def test_build_rejects_unexpected_filename(filename):
with pytest.raises(ValueError, match="Unexpected trace filename"):
resource_trace_metadata.build([Path(filename)], version="2024")
Loading
Loading