From 027d2be608b80886788f49475ff6edd1fb2499b7 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 14:37:33 +1000
Subject: [PATCH 01/11] Delete rename function (now use AEMO name for tmp
 parquets)

---
 src/isp_trace_parser/demand_traces.py | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 9703fe6..62a7e0c 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -203,9 +203,7 @@ def restructure_demand_file(
         trace = trace_formatter(trace)
         trace = _frame_with_metadata(trace, file_metadata)
 
-        save_filepath = output_directory / write_new_demand_filename(
-            metadata=file_metadata
-        )
+        save_filepath = output_directory / input_filepath.with_suffix(".parquet").name
         save_filepath.parent.mkdir(parents=True, exist_ok=True)
 
         trace.write_parquet(save_filepath)
@@ -241,20 +239,3 @@ def get_save_scenario_for_demand_trace(
         The mapped scenario name as a string.
     """
     return demand_scenario_mapping[file_metadata["scenario"]]
-
-
-def write_new_demand_filename(metadata: dict[str, str]) -> str:
-    """
-    Generates the output filename for a demand trace file.
-
-    Args:
-        metadata: Dictionary containing metadata for the demand trace file.
-
-    Returns:
-        A string representing the filename.
-    """
-    m = metadata
-    subregion = m["subregion"].replace(" ", "_")
-    scenario = m["scenario"].replace(" ", "_")
-
-    return f"{scenario}_RefYear{m['reference_year']}_{subregion}_{m['poe']}_{m['demand_type']}.parquet"

From 74da3f7784b555efe0070372e1e10184cf32d676 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 14:38:56 +1000
Subject: [PATCH 02/11] Renamed tests and test data

(tests/test_data/output/Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet -> tests/test_data/output/CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet)
---
 ...11_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet} | Bin
 tests/test_get_data.py                              |   8 ++++----
 tests/test_trace_parsers.py                         |   4 +---
 3 files changed, 5 insertions(+), 7 deletions(-)
 rename tests/test_data/output/{Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet => CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet} (100%)

diff --git a/tests/test_data/output/Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet b/tests/test_data/output/CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet
similarity index 100%
rename from tests/test_data/output/Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet
rename to tests/test_data/output/CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet
diff --git a/tests/test_get_data.py b/tests/test_get_data.py
index ddb1d0a..609226a 100644
--- a/tests/test_get_data.py
+++ b/tests/test_get_data.py
@@ -149,7 +149,7 @@ def test_get_demand_single_reference_year(parsed_trace_trace_directory: Path):
     test_df_lazy = pl.scan_parquet(
         TEST_DATA
         / "output"
-        / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet"
+        / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet"
     )
 
     start_dt, end_dt = _year_range_to_dt_range(2023, 2024, year_type="fy")
@@ -182,7 +182,7 @@ def test_get_demand_multiple_reference_year(parsed_trace_trace_directory: Path):
     test_df_lazy = pl.scan_parquet(
         TEST_DATA
         / "output"
-        / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet"
+        / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet"
     )
 
     test_df = (
@@ -366,7 +366,7 @@ def test_demand_single_reference_year(parsed_trace_trace_directory: Path):
     test_df_lazy = pl.scan_parquet(
         TEST_DATA
         / "output"
-        / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet"
+        / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet"
     )
 
     start_dt, end_dt = _year_range_to_dt_range(2023, 2024, year_type="fy")
@@ -399,7 +399,7 @@ def test_demand_multiple_reference_years(parsed_trace_trace_directory: Path):
     test_df_lazy = pl.scan_parquet(
         TEST_DATA
         / "output"
-        / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet"
+        / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet"
     )
 
     test_df = (
diff --git a/tests/test_trace_parsers.py b/tests/test_trace_parsers.py
index c045908..ac72a08 100644
--- a/tests/test_trace_parsers.py
+++ b/tests/test_trace_parsers.py
@@ -14,9 +14,7 @@
 def test_demand_trace_parsing(use_concurrency: bool):
     """Test demand trace parsing produces expected parquet output."""
     test_demand_csv_directory = TEST_DATA / "demand"
-    expected_filename = (
-        "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet"
-    )
+    expected_filename = "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet"
     test_demand_output_parquet = TEST_DATA / "output" / expected_filename
 
     with tempfile.TemporaryDirectory() as tmp_parsed_directory:

From 55101db004a21a4d5809c9a2520da1ff1464d337 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 14:46:35 +1000
Subject: [PATCH 03/11] Updated doc strings to reflect new output format

---
 src/isp_trace_parser/demand_traces.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 62a7e0c..1ca9653 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -70,17 +70,16 @@ def parse_demand_traces(
     contains metadata in the following format "<subregionID>_RefYear_<reference year>_<scenario>_<poe>_<data type>.csv".
     For example, "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv".
 
-    The trace parser reformats the data and stores the data files in parquet format with metadata columns,
-    which match the IASR workbook conventions.
+    The trace parser reformats the data and stores the data files in parquet format with metadata columns.
     The data format is changed to include a column "datetime" specifying the end of the half hour period
     the measurement is for in the format %Y-%m-%d %H:%M:%S, a column "value" specifying the measurement
     value, and metadata columns (subregion, reference_year, scenario, poe, demand_type). The scenario
-    column contains the mapped scenario name from the IASR workbook. Files are saved with a new naming
-    convention: "<mapped_scenario>_RefYear<reference_year>_<subregion>_<poe>_<demand_type>.parquet".
+    column contains the mapped scenario name from the IASR workbook. Output files keep the AEMO input
+    stem, with the .csv suffix replaced by .parquet.
 
     For the CSV example above, the parsed filename would be:
 
-        "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet"
+        "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet"
 
     By default, all trace data in the input directory is parsed. However, a DemandMetadataFilter can be provided
     to filter the traces based on metadata. If a metadata type is present in the filter then only traces with a
@@ -162,11 +161,13 @@ def restructure_demand_file(
     filters: DemandMetadataFilter | None = None,
 ) -> None:
     """
-    Restructures a single demand trace file and saves it in a parquet format.
+    Restructures a single demand trace file and saves it as parquet.
 
-    This function processes a demand trace file, restructures and saves it in a new format, with the original
-    input filename stem and a .parquet extension. It handles the mapping of scenario names and applies filters
-    if provided.
+    The output filename is the AEMO input stem with the .csv suffix replaced by
+    .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv
+    becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet).
+    The scenario *column* is currently  translated to the IASR workbook name; filters
+    are applied before any reading.
 
     Args:
         input_filepath: Path object representing the input demand trace file.

From 1e77d40c18546f6fd7709ffabd6d55488d4530f1 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 14:48:13 +1000
Subject: [PATCH 04/11] Deleted one-line helper (redundant / doesn't make sense
 with current dict lookup)

---
 src/isp_trace_parser/demand_traces.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 1ca9653..c50df2a 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -194,9 +194,7 @@ def restructure_demand_file(
     """
     file_metadata = dict(all_input_file_metadata[input_filepath])
 
-    file_metadata["scenario"] = get_save_scenario_for_demand_trace(
-        file_metadata, demand_scenario_mapping
-    )
+    file_metadata["scenario"] = demand_scenario_mapping[file_metadata["scenario"]]
 
     parse_file = check_filter_by_metadata(file_metadata, filters)
     if parse_file:

From 8bb5d0572eb26e34158290f4748f9f0cb1130699 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 14:49:40 +1000
Subject: [PATCH 05/11] Deleted one-line helper (redundant / doesn't make sense
 with current dict lookup)

---
 src/isp_trace_parser/demand_traces.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index c50df2a..35fcb05 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -222,19 +222,3 @@ def _frame_with_metadata(trace: pl.DataFrame, file_metadata: dict) -> pl.DataFra
         poe=pl.lit(file_metadata["poe"]),
         demand_type=pl.lit(file_metadata["demand_type"]),
     )
-
-
-def get_save_scenario_for_demand_trace(
-    file_metadata: dict[str, str], demand_scenario_mapping: dict[str, str]
-) -> str:
-    """
-    Maps the raw scenario name to the IASR workbook scenario name.
-
-    Args:
-        file_metadata: Dictionary containing metadata for the demand trace file.
-        demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names.
-
-    Returns:
-        The mapped scenario name as a string.
-    """
-    return demand_scenario_mapping[file_metadata["scenario"]]

From 8f1de4b5cd979c8afd0788edab5bf0cc8bde575c Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 14:53:22 +1000
Subject: [PATCH 06/11] Removed unnecessary test

---
 tests/test_writing_save_names.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/tests/test_writing_save_names.py b/tests/test_writing_save_names.py
index 6702326..3fc47a1 100644
--- a/tests/test_writing_save_names.py
+++ b/tests/test_writing_save_names.py
@@ -51,17 +51,3 @@ def test_write_wind_save_names():
     )
 
     assert str(save_filepath) == "RefYear1_a_x.parquet"
-
-
-def test_write_demand_save_names():
-    meta_data = {
-        "scenario": "a",
-        "reference_year": "1",
-        "subregion": "x",
-        "poe": "poe10",
-        "demand_type": "y",
-    }
-
-    save_filepath = isp_trace_parser.demand_traces.write_new_demand_filename(meta_data)
-
-    assert str(save_filepath) == "a_RefYear1_x_poe10_y.parquet"

From da414310e7eb9f2d9b1a56b01c9daf2af98cf0a1 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 15:08:35 +1000
Subject: [PATCH 07/11] Updated partial function / restructure file function
 (to take metadata for single file, rather than whole dict)

---
 src/isp_trace_parser/demand_traces.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 35fcb05..8da9363 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -137,7 +137,6 @@ def parse_demand_traces(
 
     partial_func = functools.partial(
         restructure_demand_file,
-        all_input_file_metadata=file_metadata,
         demand_scenario_mapping=demand_scenario_mapping,
         output_directory=parsed_directory,
         filters=filters,
@@ -146,16 +145,19 @@ def parse_demand_traces(
     if use_concurrency:
         max_workers = os.cpu_count() - 2
 
-        Parallel(n_jobs=max_workers)(delayed(partial_func)(file) for file in files)
+        Parallel(n_jobs=max_workers)(
+            delayed(partial_func)(file, metadata)
+            for file, metadata in file_metadata.items()
+        )
 
     else:
-        for file in files:
-            partial_func(file)
+        for file, metadata in file_metadata.items():
+            partial_func(file, metadata)
 
 
 def restructure_demand_file(
     input_filepath: Path,
-    all_input_file_metadata: dict[Path, dict[str, str | int]],
+    file_metadata: dict[str, str | int],
     demand_scenario_mapping: dict[str, str],
     output_directory: Path,
     filters: DemandMetadataFilter | None = None,
@@ -171,7 +173,7 @@ def restructure_demand_file(
 
     Args:
         input_filepath: Path object representing the input demand trace file.
-        all_input_file_metadata: Metadata for all input files.
+        file_metadata: Metadata for this trace file.
         demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names.
         output_directory: Directory where restructured files will be saved.
         filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata.
@@ -192,8 +194,6 @@ def restructure_demand_file(
 
         # This will process the input file and save it in parquet format in the specified output directory
     """
-    file_metadata = dict(all_input_file_metadata[input_filepath])
-
     file_metadata["scenario"] = demand_scenario_mapping[file_metadata["scenario"]]
 
     parse_file = check_filter_by_metadata(file_metadata, filters)
@@ -202,9 +202,7 @@ def restructure_demand_file(
         trace = trace_formatter(trace)
         trace = _frame_with_metadata(trace, file_metadata)
 
-        save_filepath = output_directory / input_filepath.with_suffix(".parquet").name
-        save_filepath.parent.mkdir(parents=True, exist_ok=True)
-
+        save_filepath = output_directory / f"{input_filepath.stem}.parquet"
         trace.write_parquet(save_filepath)
 
 

From 96351a99c0cc403bb9c765e7be22083af9d13bde Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 15:44:55 +1000
Subject: [PATCH 08/11] Added back in mkdir (erronousely removed)

---
 src/isp_trace_parser/demand_traces.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 8da9363..0bb3b36 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -129,6 +129,7 @@ def parse_demand_traces(
     """
     input_directory = input_validation.input_directory(input_directory)
     parsed_directory = input_validation.parsed_directory(parsed_directory)
+    parsed_directory.mkdir(parents=True, exist_ok=True)
 
     files = get_all_filepaths(input_directory)
     file_metadata = demand_trace_metadata.build(files, version="2024")

From 1e8ad5fdb8f86e8cc0e95ccba0ce43271180f875 Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Mon, 22 Jun 2026 15:53:22 +1000
Subject: [PATCH 09/11] Updated doc strings

---
 src/isp_trace_parser/demand_traces.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 0bb3b36..19901a0 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -169,12 +169,13 @@ def restructure_demand_file(
     The output filename is the AEMO input stem with the .csv suffix replaced by
     .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv
     becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet).
-    The scenario *column* is currently  translated to the IASR workbook name; filters
-    are applied before any reading.
+    The scenario column is translated to the IASR workbook name before filtering,
+    so filter values must use the IASR names. `file_metadata` is mutated in place
+    to hold the translated scenario (for now - may change).
 
     Args:
         input_filepath: Path object representing the input demand trace file.
-        file_metadata: Metadata for this trace file.
+        file_metadata: Metadata for this trace file (sceanrio name mutated in place).
         demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names.
         output_directory: Directory where restructured files will be saved.
         filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata.
@@ -185,10 +186,19 @@ def restructure_demand_file(
     Example:
         >>> input_filepath = Path('CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv')
 
+        >>> file_metadata = {
+        ...     'subregion': 'CNSW',
+        ...     'scenario': 'HYDROGEN_EXPORT',
+        ...     'poe': 'POE10',
+        ...     'demand_type': 'OPSO_MODELLING',
+        ...     'reference_year': 2011,
+        ... }
+
         >>> demand_scenario_mapping = {'HYDROGEN_EXPORT': 'Green Energy Exports'}
 
         >>> restructure_demand_file(
         ...     input_filepath=input_filepath,
+        ...     file_metadata=file_metadata,
         ...     demand_scenario_mapping=demand_scenario_mapping,
         ...     output_directory='/path/to/output'
         ... )  # doctest: +SKIP

From 9a13ba85ab4579356b56c751ab3ab2968004d24b Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Tue, 23 Jun 2026 14:50:46 +1000
Subject: [PATCH 10/11] Minor update to doc strings

---
 src/isp_trace_parser/demand_traces.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 19901a0..9c27b1f 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -169,9 +169,9 @@ def restructure_demand_file(
     The output filename is the AEMO input stem with the .csv suffix replaced by
     .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv
     becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet).
-    The scenario column is translated to the IASR workbook name before filtering,
-    so filter values must use the IASR names. `file_metadata` is mutated in place
-    to hold the translated scenario (for now - may change).
+
+    `file_metadata` is changed in place to hold the translated scenario (for including
+    and storing within the dataframe).
 
     Args:
         input_filepath: Path object representing the input demand trace file.

From d202c81d1a1562fee51103136b835b850a24968f Mon Sep 17 00:00:00 2001
From: Dylan McConnell <dylanjmcconnell@users.noreply.github.com>
Date: Wed, 24 Jun 2026 22:39:26 +1000
Subject: [PATCH 11/11] Updated docstrings (addresses code review comments from
 Nick)

---
 src/isp_trace_parser/demand_traces.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py
index 9c27b1f..ab17556 100644
--- a/src/isp_trace_parser/demand_traces.py
+++ b/src/isp_trace_parser/demand_traces.py
@@ -75,7 +75,7 @@ def parse_demand_traces(
     the measurement is for in the format %Y-%m-%d %H:%M:%S, a column "value" specifying the measurement
     value, and metadata columns (subregion, reference_year, scenario, poe, demand_type). The scenario
     column contains the mapped scenario name from the IASR workbook. Output files keep the AEMO input
-    stem, with the .csv suffix replaced by .parquet.
+    filename, with the .csv suffix replaced by .parquet.
 
     For the CSV example above, the parsed filename would be:
 
@@ -166,7 +166,7 @@ def restructure_demand_file(
     """
     Restructures a single demand trace file and saves it as parquet.
 
-    The output filename is the AEMO input stem with the .csv suffix replaced by
+    The output filename is the AEMO input filename with the .csv suffix replaced by
     .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv
     becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet).
 
@@ -175,7 +175,7 @@ def restructure_demand_file(
 
     Args:
         input_filepath: Path object representing the input demand trace file.
-        file_metadata: Metadata for this trace file (sceanrio name mutated in place).
+        file_metadata: Metadata for this trace file (scenario name mutated in place).
         demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names.
         output_directory: Directory where restructured files will be saved.
         filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata.