From 704fb8f18cd6ddb8035bd478bda3db9af054c39e Mon Sep 17 00:00:00 2001 From: Duda Nogueira Date: Thu, 14 May 2026 14:57:24 -0300 Subject: [PATCH 1/2] Fix aggregate metrics returning 0/0.0 instead of None for empty result sets - Fixed __parse_property_grpc() to check HasField() before accessing optional protobuf scalars - Integer/Number aggregates now correctly return None for unset metrics (maximum, mean, median, minimum, mode, sum_) - Date aggregates now correctly return None for unset metrics (maximum, median, minimum, mode) - Boolean aggregates now correctly return None for unset metrics (percentage_false, percentage_true, total_false, total_true) - Added integration test to verify None values for empty aggregate results - Aligns gRPC behavior with GraphQL API behavior Fixes #2036 --- integration/test_collection_aggregate.py | 55 +++++++++++++++++++ .../collections/aggregations/base_executor.py | 40 +++++++------- 2 files changed, 75 insertions(+), 20 deletions(-) diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index 3aa9af54f..eac8356fd 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -884,3 +884,58 @@ def test_hybrid_bm25_operators(collection_factory: CollectionFactory) -> None: object_limit=10, ) assert res.total_count == 4 + + +def test_aggregate_empty_result_none_values(collection_factory: CollectionFactory) -> None: + """Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0""" + collection = collection_factory( + properties=[ + Property(name="bucket", data_type=DataType.INT, index_filterable=True, index_range_filters=True), + Property(name="intVal", data_type=DataType.INT), + Property(name="numberVal", data_type=DataType.NUMBER), + ], + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index(index_null_state=True), + ) + + if collection._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("gRPC aggregates are only supported in versions 1.29.0 and higher") + + # Insert one object with bucket=0 + collection.data.insert({"bucket": 0, "intVal": 123, "numberVal": 456.78}) + + # Query with a filter that returns no results (bucket=99 doesn't exist) + res: AggregateReturn = collection.aggregate.over_all( + filters=Filter.by_property("bucket").equal(99), + total_count=True, + return_metrics=[ + Metrics("intVal").integer(count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True), + Metrics("numberVal").number(count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True), + ], + ) + + # Verify total_count is 0 + assert res.total_count == 0 + + # Verify integer metrics: count should be 0, all other metrics should be None (not 0) + int_metrics = res.properties["intVal"] + assert isinstance(int_metrics, AggregateInteger) + assert int_metrics.count == 0 + assert int_metrics.maximum is None, "maximum should be None for empty result set" + assert int_metrics.mean is None, "mean should be None for empty result set" + assert int_metrics.median is None, "median should be None for empty result set" + assert int_metrics.minimum is None, "minimum should be None for empty result set" + assert int_metrics.mode is None, "mode should be None for empty result set" + assert int_metrics.sum_ is None, "sum_ should be None for empty result set" + + # Verify number metrics: count should be 0, all other metrics should be None (not 0.0) + number_metrics = res.properties["numberVal"] + assert isinstance(number_metrics, AggregateNumber) + assert number_metrics.count == 0 + assert number_metrics.maximum is None, "maximum should be None for empty result set" + assert number_metrics.mean is None, "mean should be None for empty result set" + assert number_metrics.median is None, "median should be None for empty result set" + assert number_metrics.minimum is None, "minimum should be None for empty result set" + assert number_metrics.mode is None, "mode should be None for empty result set" + assert number_metrics.sum_ is None, "sum_ should be None for empty result set" + diff --git a/weaviate/collections/aggregations/base_executor.py b/weaviate/collections/aggregations/base_executor.py index d316fe76a..d3a98c0a6 100644 --- a/weaviate/collections/aggregations/base_executor.py +++ b/weaviate/collections/aggregations/base_executor.py @@ -269,38 +269,38 @@ def __parse_property_grpc( elif aggregation.HasField("int"): return AggregateInteger( count=aggregation.int.count, - maximum=aggregation.int.maximum, - mean=aggregation.int.mean, - median=aggregation.int.median, - minimum=aggregation.int.minimum, - mode=aggregation.int.mode, - sum_=aggregation.int.sum, + maximum=aggregation.int.maximum if aggregation.int.HasField("maximum") else None, + mean=aggregation.int.mean if aggregation.int.HasField("mean") else None, + median=aggregation.int.median if aggregation.int.HasField("median") else None, + minimum=aggregation.int.minimum if aggregation.int.HasField("minimum") else None, + mode=aggregation.int.mode if aggregation.int.HasField("mode") else None, + sum_=aggregation.int.sum if aggregation.int.HasField("sum") else None, ) elif aggregation.HasField("number"): return AggregateNumber( count=aggregation.number.count, - maximum=aggregation.number.maximum, - mean=aggregation.number.mean, - median=aggregation.number.median, - minimum=aggregation.number.minimum, - mode=aggregation.number.mode, - sum_=aggregation.number.sum, + maximum=aggregation.number.maximum if aggregation.number.HasField("maximum") else None, + mean=aggregation.number.mean if aggregation.number.HasField("mean") else None, + median=aggregation.number.median if aggregation.number.HasField("median") else None, + minimum=aggregation.number.minimum if aggregation.number.HasField("minimum") else None, + mode=aggregation.number.mode if aggregation.number.HasField("mode") else None, + sum_=aggregation.number.sum if aggregation.number.HasField("sum") else None, ) elif aggregation.HasField("boolean"): return AggregateBoolean( count=aggregation.boolean.count, - percentage_false=aggregation.boolean.percentage_false, - percentage_true=aggregation.boolean.percentage_true, - total_false=aggregation.boolean.total_false, - total_true=aggregation.boolean.total_true, + percentage_false=aggregation.boolean.percentage_false if aggregation.boolean.HasField("percentage_false") else None, + percentage_true=aggregation.boolean.percentage_true if aggregation.boolean.HasField("percentage_true") else None, + total_false=aggregation.boolean.total_false if aggregation.boolean.HasField("total_false") else None, + total_true=aggregation.boolean.total_true if aggregation.boolean.HasField("total_true") else None, ) elif aggregation.HasField("date"): return AggregateDate( count=aggregation.date.count, - maximum=aggregation.date.maximum, - median=aggregation.date.median, - minimum=aggregation.date.minimum, - mode=aggregation.date.mode, + maximum=aggregation.date.maximum if aggregation.date.HasField("maximum") else None, + median=aggregation.date.median if aggregation.date.HasField("median") else None, + minimum=aggregation.date.minimum if aggregation.date.HasField("minimum") else None, + mode=aggregation.date.mode if aggregation.date.HasField("mode") else None, ) elif aggregation.HasField("reference"): return AggregateReference(pointing_to=list(aggregation.reference.pointing_to)) From 397cf5bb6cbf9401d27e2bbacff8c88e79b3223a Mon Sep 17 00:00:00 2001 From: Duda Nogueira Date: Thu, 14 May 2026 15:15:48 -0300 Subject: [PATCH 2/2] Fix pre-commit lint and formatting issues --- integration/test_collection_aggregate.py | 30 ++++++++++++------- .../collections/aggregations/base_executor.py | 24 +++++++++++---- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index eac8356fd..702e85ba0 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -887,36 +887,45 @@ def test_hybrid_bm25_operators(collection_factory: CollectionFactory) -> None: def test_aggregate_empty_result_none_values(collection_factory: CollectionFactory) -> None: - """Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0""" + """Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0.""" collection = collection_factory( properties=[ - Property(name="bucket", data_type=DataType.INT, index_filterable=True, index_range_filters=True), + Property( + name="bucket", + data_type=DataType.INT, + index_filterable=True, + index_range_filters=True, + ), Property(name="intVal", data_type=DataType.INT), Property(name="numberVal", data_type=DataType.NUMBER), ], vectorizer_config=Configure.Vectorizer.none(), inverted_index_config=Configure.inverted_index(index_null_state=True), ) - + if collection._connection._weaviate_version.is_lower_than(1, 29, 0): pytest.skip("gRPC aggregates are only supported in versions 1.29.0 and higher") - + # Insert one object with bucket=0 collection.data.insert({"bucket": 0, "intVal": 123, "numberVal": 456.78}) - + # Query with a filter that returns no results (bucket=99 doesn't exist) res: AggregateReturn = collection.aggregate.over_all( filters=Filter.by_property("bucket").equal(99), total_count=True, return_metrics=[ - Metrics("intVal").integer(count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True), - Metrics("numberVal").number(count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True), + Metrics("intVal").integer( + count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True + ), + Metrics("numberVal").number( + count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True + ), ], ) - + # Verify total_count is 0 assert res.total_count == 0 - + # Verify integer metrics: count should be 0, all other metrics should be None (not 0) int_metrics = res.properties["intVal"] assert isinstance(int_metrics, AggregateInteger) @@ -927,7 +936,7 @@ def test_aggregate_empty_result_none_values(collection_factory: CollectionFactor assert int_metrics.minimum is None, "minimum should be None for empty result set" assert int_metrics.mode is None, "mode should be None for empty result set" assert int_metrics.sum_ is None, "sum_ should be None for empty result set" - + # Verify number metrics: count should be 0, all other metrics should be None (not 0.0) number_metrics = res.properties["numberVal"] assert isinstance(number_metrics, AggregateNumber) @@ -938,4 +947,3 @@ def test_aggregate_empty_result_none_values(collection_factory: CollectionFactor assert number_metrics.minimum is None, "minimum should be None for empty result set" assert number_metrics.mode is None, "mode should be None for empty result set" assert number_metrics.sum_ is None, "sum_ should be None for empty result set" - diff --git a/weaviate/collections/aggregations/base_executor.py b/weaviate/collections/aggregations/base_executor.py index d3a98c0a6..90cc72089 100644 --- a/weaviate/collections/aggregations/base_executor.py +++ b/weaviate/collections/aggregations/base_executor.py @@ -279,20 +279,32 @@ def __parse_property_grpc( elif aggregation.HasField("number"): return AggregateNumber( count=aggregation.number.count, - maximum=aggregation.number.maximum if aggregation.number.HasField("maximum") else None, + maximum=aggregation.number.maximum + if aggregation.number.HasField("maximum") + else None, mean=aggregation.number.mean if aggregation.number.HasField("mean") else None, median=aggregation.number.median if aggregation.number.HasField("median") else None, - minimum=aggregation.number.minimum if aggregation.number.HasField("minimum") else None, + minimum=aggregation.number.minimum + if aggregation.number.HasField("minimum") + else None, mode=aggregation.number.mode if aggregation.number.HasField("mode") else None, sum_=aggregation.number.sum if aggregation.number.HasField("sum") else None, ) elif aggregation.HasField("boolean"): return AggregateBoolean( count=aggregation.boolean.count, - percentage_false=aggregation.boolean.percentage_false if aggregation.boolean.HasField("percentage_false") else None, - percentage_true=aggregation.boolean.percentage_true if aggregation.boolean.HasField("percentage_true") else None, - total_false=aggregation.boolean.total_false if aggregation.boolean.HasField("total_false") else None, - total_true=aggregation.boolean.total_true if aggregation.boolean.HasField("total_true") else None, + percentage_false=aggregation.boolean.percentage_false + if aggregation.boolean.HasField("percentage_false") + else None, + percentage_true=aggregation.boolean.percentage_true + if aggregation.boolean.HasField("percentage_true") + else None, + total_false=aggregation.boolean.total_false + if aggregation.boolean.HasField("total_false") + else None, + total_true=aggregation.boolean.total_true + if aggregation.boolean.HasField("total_true") + else None, ) elif aggregation.HasField("date"): return AggregateDate(