diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index c5d4a2e9..0673cd65 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -1111,8 +1111,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "") assert results_dir.is_dir(), f"Expected directory {results_dir} not found" files_in_dir = list(results_dir.glob("*")) - assert len(files_in_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" + assert len(files_in_dir) == 12, ( + f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" ) print(f"Found files in {results_dir}:") for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES: @@ -1133,6 +1133,25 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + import pandas as pd + + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = len(pd.read_parquet(parquet_path)) + with geojson_path.open() as f: + geojson_feature_count = len(json.load(f)["features"]) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + # Validate the execute command exited successfully assert result.exit_code == 0 diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 59ba189e..0fd367f0 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -354,7 +354,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back( @pytest.mark.flaky(retries=1, delay=5) @pytest.mark.timeout(timeout=60 * 10) @pytest.mark.sequential # Helps on Linux with image analysis step otherwise timing out -async def test_gui_run_download( # noqa: PLR0915 +async def test_gui_run_download( # noqa: PLR0914, PLR0915 user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property ) -> None: """Test that the user can download a run result via the GUI.""" @@ -440,8 +440,8 @@ async def test_gui_run_download( # noqa: PLR0915 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: " + assert len(files_in_results_dir) == 12, ( + f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) @@ -464,6 +464,27 @@ async def test_gui_run_download( # noqa: PLR0915 f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + import json + + import pandas as pd + + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = len(pd.read_parquet(parquet_path)) + with geojson_path.open() as f: + geojson_feature_count = len(json.load(f)["features"]) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + @pytest.mark.integration @pytest.mark.sequential diff --git a/tests/constants_test.py b/tests/constants_test.py index 0296cb0d..aa18676e 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -83,15 +83,18 @@ # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) @@ -105,6 +108,9 @@ ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), ("tissue_segmentation_csv_class_information.csv", 446, 10), ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case "staging": @@ -124,15 +130,18 @@ # See production block above for instructions on how to update these sizes. SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) @@ -146,6 +155,9 @@ ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), ("tissue_segmentation_csv_class_information.csv", 446, 10), ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case _: