From 835f57ee3434f3a827ea62a3198aade62d79a795 Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 12 May 2026 01:15:51 +0200 Subject: [PATCH 1/2] feat(tests): add parquet size checks and GeoJSON parity validation for HETA 1.2.0 --- tests/aignostics/application/cli_test.py | 23 +++++++++++++++++++++-- tests/constants_test.py | 6 ++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index c5d4a2e9..0673cd65 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -1111,8 +1111,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "") assert results_dir.is_dir(), f"Expected directory {results_dir} not found" files_in_dir = list(results_dir.glob("*")) - assert len(files_in_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" + assert len(files_in_dir) == 12, ( + f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" ) print(f"Found files in {results_dir}:") for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES: @@ -1133,6 +1133,25 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + import pandas as pd + + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = len(pd.read_parquet(parquet_path)) + with geojson_path.open() as f: + geojson_feature_count = len(json.load(f)["features"]) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + # Validate the execute command exited successfully assert result.exit_code == 0 diff --git a/tests/constants_test.py b/tests/constants_test.py index 0296cb0d..9aec7ee3 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -105,6 +105,9 @@ ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), ("tissue_segmentation_csv_class_information.csv", 446, 10), ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case "staging": @@ -146,6 +149,9 @@ ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), ("tissue_segmentation_csv_class_information.csv", 446, 10), ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case _: From 4bf84bb6c4814618aaa26a4e270db9dadf2d59e1 Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 12 May 2026 09:44:17 +0200 Subject: [PATCH 2/2] chore(tests): add HETA 1.2.0 parquet outputs to SPOT_0 and update gui_test file count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPOT_0_EXPECTED_RESULT_FILES updated with 3 new parquet artifacts (tissue_qc, tissue_segmentation, cell_classification) from a HETA 1.2.0 run. gui_test updated to assert 12 result files and validate parquet↔GeoJSON row count parity for all 3 paired outputs. --- tests/aignostics/application/gui_test.py | 27 +++++++++++++-- tests/constants_test.py | 42 ++++++++++++++---------- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 59ba189e..0fd367f0 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -354,7 +354,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back( @pytest.mark.flaky(retries=1, delay=5) @pytest.mark.timeout(timeout=60 * 10) @pytest.mark.sequential # Helps on Linux with image analysis step otherwise timing out -async def test_gui_run_download( # noqa: PLR0915 +async def test_gui_run_download( # noqa: PLR0914, PLR0915 user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property ) -> None: """Test that the user can download a run result via the GUI.""" @@ -440,8 +440,8 @@ async def test_gui_run_download( # noqa: PLR0915 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: " + assert len(files_in_results_dir) == 12, ( + f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) @@ -464,6 +464,27 @@ async def test_gui_run_download( # noqa: PLR0915 f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + import json + + import pandas as pd + + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = len(pd.read_parquet(parquet_path)) + with geojson_path.open() as f: + geojson_feature_count = len(json.load(f)["features"]) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + @pytest.mark.integration @pytest.mark.sequential diff --git a/tests/constants_test.py b/tests/constants_test.py index 9aec7ee3..aa18676e 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -83,15 +83,18 @@ # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) @@ -127,15 +130,18 @@ # See production block above for instructions on how to update these sizes. SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)