From 027d2be608b80886788f49475ff6edd1fb2499b7 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 14:37:33 +1000 Subject: [PATCH 01/11] Delete rename function (now use AEMO name for tmp parquets) --- src/isp_trace_parser/demand_traces.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 9703fe6..62a7e0c 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -203,9 +203,7 @@ def restructure_demand_file( trace = trace_formatter(trace) trace = _frame_with_metadata(trace, file_metadata) - save_filepath = output_directory / write_new_demand_filename( - metadata=file_metadata - ) + save_filepath = output_directory / input_filepath.with_suffix(".parquet").name save_filepath.parent.mkdir(parents=True, exist_ok=True) trace.write_parquet(save_filepath) @@ -241,20 +239,3 @@ def get_save_scenario_for_demand_trace( The mapped scenario name as a string. """ return demand_scenario_mapping[file_metadata["scenario"]] - - -def write_new_demand_filename(metadata: dict[str, str]) -> str: - """ - Generates the output filename for a demand trace file. - - Args: - metadata: Dictionary containing metadata for the demand trace file. - - Returns: - A string representing the filename. - """ - m = metadata - subregion = m["subregion"].replace(" ", "_") - scenario = m["scenario"].replace(" ", "_") - - return f"{scenario}_RefYear{m['reference_year']}_{subregion}_{m['poe']}_{m['demand_type']}.parquet" From 74da3f7784b555efe0070372e1e10184cf32d676 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 14:38:56 +1000 Subject: [PATCH 02/11] Renamed tests and test data (tests/test_data/output/Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet -> tests/test_data/output/CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet) --- ...11_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet} | Bin tests/test_get_data.py | 8 ++++---- tests/test_trace_parsers.py | 4 +--- 3 files changed, 5 insertions(+), 7 deletions(-) rename tests/test_data/output/{Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet => CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet} (100%) diff --git a/tests/test_data/output/Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet b/tests/test_data/output/CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet similarity index 100% rename from tests/test_data/output/Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet rename to tests/test_data/output/CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet diff --git a/tests/test_get_data.py b/tests/test_get_data.py index ddb1d0a..609226a 100644 --- a/tests/test_get_data.py +++ b/tests/test_get_data.py @@ -149,7 +149,7 @@ def test_get_demand_single_reference_year(parsed_trace_trace_directory: Path): test_df_lazy = pl.scan_parquet( TEST_DATA / "output" - / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet" + / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet" ) start_dt, end_dt = _year_range_to_dt_range(2023, 2024, year_type="fy") @@ -182,7 +182,7 @@ def test_get_demand_multiple_reference_year(parsed_trace_trace_directory: Path): test_df_lazy = pl.scan_parquet( TEST_DATA / "output" - / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet" + / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet" ) test_df = ( @@ -366,7 +366,7 @@ def test_demand_single_reference_year(parsed_trace_trace_directory: Path): test_df_lazy = pl.scan_parquet( TEST_DATA / "output" - / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet" + / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet" ) start_dt, end_dt = _year_range_to_dt_range(2023, 2024, year_type="fy") @@ -399,7 +399,7 @@ def test_demand_multiple_reference_years(parsed_trace_trace_directory: Path): test_df_lazy = pl.scan_parquet( TEST_DATA / "output" - / "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet" + / "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet" ) test_df = ( diff --git a/tests/test_trace_parsers.py b/tests/test_trace_parsers.py index c045908..ac72a08 100644 --- a/tests/test_trace_parsers.py +++ b/tests/test_trace_parsers.py @@ -14,9 +14,7 @@ def test_demand_trace_parsing(use_concurrency: bool): """Test demand trace parsing produces expected parquet output.""" test_demand_csv_directory = TEST_DATA / "demand" - expected_filename = ( - "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet" - ) + expected_filename = "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet" test_demand_output_parquet = TEST_DATA / "output" / expected_filename with tempfile.TemporaryDirectory() as tmp_parsed_directory: From 55101db004a21a4d5809c9a2520da1ff1464d337 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 14:46:35 +1000 Subject: [PATCH 03/11] Updated doc strings to reflect new output format --- src/isp_trace_parser/demand_traces.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 62a7e0c..1ca9653 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -70,17 +70,16 @@ def parse_demand_traces( contains metadata in the following format "_RefYear____.csv". For example, "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv". - The trace parser reformats the data and stores the data files in parquet format with metadata columns, - which match the IASR workbook conventions. + The trace parser reformats the data and stores the data files in parquet format with metadata columns. The data format is changed to include a column "datetime" specifying the end of the half hour period the measurement is for in the format %Y-%m-%d %H:%M:%S, a column "value" specifying the measurement value, and metadata columns (subregion, reference_year, scenario, poe, demand_type). The scenario - column contains the mapped scenario name from the IASR workbook. Files are saved with a new naming - convention: "_RefYear___.parquet". + column contains the mapped scenario name from the IASR workbook. Output files keep the AEMO input + stem, with the .csv suffix replaced by .parquet. For the CSV example above, the parsed filename would be: - "Green_Energy_Exports_RefYear2011_CNSW_POE10_OPSO_MODELLING.parquet" + "CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet" By default, all trace data in the input directory is parsed. However, a DemandMetadataFilter can be provided to filter the traces based on metadata. If a metadata type is present in the filter then only traces with a @@ -162,11 +161,13 @@ def restructure_demand_file( filters: DemandMetadataFilter | None = None, ) -> None: """ - Restructures a single demand trace file and saves it in a parquet format. + Restructures a single demand trace file and saves it as parquet. - This function processes a demand trace file, restructures and saves it in a new format, with the original - input filename stem and a .parquet extension. It handles the mapping of scenario names and applies filters - if provided. + The output filename is the AEMO input stem with the .csv suffix replaced by + .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv + becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet). + The scenario *column* is currently translated to the IASR workbook name; filters + are applied before any reading. Args: input_filepath: Path object representing the input demand trace file. From 1e77d40c18546f6fd7709ffabd6d55488d4530f1 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 14:48:13 +1000 Subject: [PATCH 04/11] Deleted one-line helper (redundant / doesn't make sense with current dict lookup) --- src/isp_trace_parser/demand_traces.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 1ca9653..c50df2a 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -194,9 +194,7 @@ def restructure_demand_file( """ file_metadata = dict(all_input_file_metadata[input_filepath]) - file_metadata["scenario"] = get_save_scenario_for_demand_trace( - file_metadata, demand_scenario_mapping - ) + file_metadata["scenario"] = demand_scenario_mapping[file_metadata["scenario"]] parse_file = check_filter_by_metadata(file_metadata, filters) if parse_file: From 8bb5d0572eb26e34158290f4748f9f0cb1130699 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 14:49:40 +1000 Subject: [PATCH 05/11] Deleted one-line helper (redundant / doesn't make sense with current dict lookup) --- src/isp_trace_parser/demand_traces.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index c50df2a..35fcb05 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -222,19 +222,3 @@ def _frame_with_metadata(trace: pl.DataFrame, file_metadata: dict) -> pl.DataFra poe=pl.lit(file_metadata["poe"]), demand_type=pl.lit(file_metadata["demand_type"]), ) - - -def get_save_scenario_for_demand_trace( - file_metadata: dict[str, str], demand_scenario_mapping: dict[str, str] -) -> str: - """ - Maps the raw scenario name to the IASR workbook scenario name. - - Args: - file_metadata: Dictionary containing metadata for the demand trace file. - demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names. - - Returns: - The mapped scenario name as a string. - """ - return demand_scenario_mapping[file_metadata["scenario"]] From 8f1de4b5cd979c8afd0788edab5bf0cc8bde575c Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 14:53:22 +1000 Subject: [PATCH 06/11] Removed unnecessary test --- tests/test_writing_save_names.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/test_writing_save_names.py b/tests/test_writing_save_names.py index 6702326..3fc47a1 100644 --- a/tests/test_writing_save_names.py +++ b/tests/test_writing_save_names.py @@ -51,17 +51,3 @@ def test_write_wind_save_names(): ) assert str(save_filepath) == "RefYear1_a_x.parquet" - - -def test_write_demand_save_names(): - meta_data = { - "scenario": "a", - "reference_year": "1", - "subregion": "x", - "poe": "poe10", - "demand_type": "y", - } - - save_filepath = isp_trace_parser.demand_traces.write_new_demand_filename(meta_data) - - assert str(save_filepath) == "a_RefYear1_x_poe10_y.parquet" From da414310e7eb9f2d9b1a56b01c9daf2af98cf0a1 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 15:08:35 +1000 Subject: [PATCH 07/11] Updated partial function / restructure file function (to take metadata for single file, rather than whole dict) --- src/isp_trace_parser/demand_traces.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 35fcb05..8da9363 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -137,7 +137,6 @@ def parse_demand_traces( partial_func = functools.partial( restructure_demand_file, - all_input_file_metadata=file_metadata, demand_scenario_mapping=demand_scenario_mapping, output_directory=parsed_directory, filters=filters, @@ -146,16 +145,19 @@ def parse_demand_traces( if use_concurrency: max_workers = os.cpu_count() - 2 - Parallel(n_jobs=max_workers)(delayed(partial_func)(file) for file in files) + Parallel(n_jobs=max_workers)( + delayed(partial_func)(file, metadata) + for file, metadata in file_metadata.items() + ) else: - for file in files: - partial_func(file) + for file, metadata in file_metadata.items(): + partial_func(file, metadata) def restructure_demand_file( input_filepath: Path, - all_input_file_metadata: dict[Path, dict[str, str | int]], + file_metadata: dict[str, str | int], demand_scenario_mapping: dict[str, str], output_directory: Path, filters: DemandMetadataFilter | None = None, @@ -171,7 +173,7 @@ def restructure_demand_file( Args: input_filepath: Path object representing the input demand trace file. - all_input_file_metadata: Metadata for all input files. + file_metadata: Metadata for this trace file. demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names. output_directory: Directory where restructured files will be saved. filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata. @@ -192,8 +194,6 @@ def restructure_demand_file( # This will process the input file and save it in parquet format in the specified output directory """ - file_metadata = dict(all_input_file_metadata[input_filepath]) - file_metadata["scenario"] = demand_scenario_mapping[file_metadata["scenario"]] parse_file = check_filter_by_metadata(file_metadata, filters) @@ -202,9 +202,7 @@ def restructure_demand_file( trace = trace_formatter(trace) trace = _frame_with_metadata(trace, file_metadata) - save_filepath = output_directory / input_filepath.with_suffix(".parquet").name - save_filepath.parent.mkdir(parents=True, exist_ok=True) - + save_filepath = output_directory / f"{input_filepath.stem}.parquet" trace.write_parquet(save_filepath) From 96351a99c0cc403bb9c765e7be22083af9d13bde Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 15:44:55 +1000 Subject: [PATCH 08/11] Added back in mkdir (erronousely removed) --- src/isp_trace_parser/demand_traces.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 8da9363..0bb3b36 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -129,6 +129,7 @@ def parse_demand_traces( """ input_directory = input_validation.input_directory(input_directory) parsed_directory = input_validation.parsed_directory(parsed_directory) + parsed_directory.mkdir(parents=True, exist_ok=True) files = get_all_filepaths(input_directory) file_metadata = demand_trace_metadata.build(files, version="2024") From 1e8ad5fdb8f86e8cc0e95ccba0ce43271180f875 Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Mon, 22 Jun 2026 15:53:22 +1000 Subject: [PATCH 09/11] Updated doc strings --- src/isp_trace_parser/demand_traces.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 0bb3b36..19901a0 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -169,12 +169,13 @@ def restructure_demand_file( The output filename is the AEMO input stem with the .csv suffix replaced by .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet). - The scenario *column* is currently translated to the IASR workbook name; filters - are applied before any reading. + The scenario column is translated to the IASR workbook name before filtering, + so filter values must use the IASR names. `file_metadata` is mutated in place + to hold the translated scenario (for now - may change). Args: input_filepath: Path object representing the input demand trace file. - file_metadata: Metadata for this trace file. + file_metadata: Metadata for this trace file (sceanrio name mutated in place). demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names. output_directory: Directory where restructured files will be saved. filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata. @@ -185,10 +186,19 @@ def restructure_demand_file( Example: >>> input_filepath = Path('CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv') + >>> file_metadata = { + ... 'subregion': 'CNSW', + ... 'scenario': 'HYDROGEN_EXPORT', + ... 'poe': 'POE10', + ... 'demand_type': 'OPSO_MODELLING', + ... 'reference_year': 2011, + ... } + >>> demand_scenario_mapping = {'HYDROGEN_EXPORT': 'Green Energy Exports'} >>> restructure_demand_file( ... input_filepath=input_filepath, + ... file_metadata=file_metadata, ... demand_scenario_mapping=demand_scenario_mapping, ... output_directory='/path/to/output' ... ) # doctest: +SKIP From 9a13ba85ab4579356b56c751ab3ab2968004d24b Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Tue, 23 Jun 2026 14:50:46 +1000 Subject: [PATCH 10/11] Minor update to doc strings --- src/isp_trace_parser/demand_traces.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 19901a0..9c27b1f 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -169,9 +169,9 @@ def restructure_demand_file( The output filename is the AEMO input stem with the .csv suffix replaced by .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet). - The scenario column is translated to the IASR workbook name before filtering, - so filter values must use the IASR names. `file_metadata` is mutated in place - to hold the translated scenario (for now - may change). + + `file_metadata` is changed in place to hold the translated scenario (for including + and storing within the dataframe). Args: input_filepath: Path object representing the input demand trace file. From d202c81d1a1562fee51103136b835b850a24968f Mon Sep 17 00:00:00 2001 From: Dylan McConnell Date: Wed, 24 Jun 2026 22:39:26 +1000 Subject: [PATCH 11/11] Updated docstrings (addresses code review comments from Nick) --- src/isp_trace_parser/demand_traces.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/isp_trace_parser/demand_traces.py b/src/isp_trace_parser/demand_traces.py index 9c27b1f..ab17556 100644 --- a/src/isp_trace_parser/demand_traces.py +++ b/src/isp_trace_parser/demand_traces.py @@ -75,7 +75,7 @@ def parse_demand_traces( the measurement is for in the format %Y-%m-%d %H:%M:%S, a column "value" specifying the measurement value, and metadata columns (subregion, reference_year, scenario, poe, demand_type). The scenario column contains the mapped scenario name from the IASR workbook. Output files keep the AEMO input - stem, with the .csv suffix replaced by .parquet. + filename, with the .csv suffix replaced by .parquet. For the CSV example above, the parsed filename would be: @@ -166,7 +166,7 @@ def restructure_demand_file( """ Restructures a single demand trace file and saves it as parquet. - The output filename is the AEMO input stem with the .csv suffix replaced by + The output filename is the AEMO input filename with the .csv suffix replaced by .parquet (e.g. CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.csv becomes CNSW_RefYear_2011_HYDROGEN_EXPORT_POE10_OPSO_MODELLING.parquet). @@ -175,7 +175,7 @@ def restructure_demand_file( Args: input_filepath: Path object representing the input demand trace file. - file_metadata: Metadata for this trace file (sceanrio name mutated in place). + file_metadata: Metadata for this trace file (scenario name mutated in place). demand_scenario_mapping: Dictionary mapping raw scenario names to IASR workbook scenario names. output_directory: Directory where restructured files will be saved. filters: DemandMetadataFilter or None, specifies which traces to parse based on metadata.