diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index 3aa9af54f..702e85ba0 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -884,3 +884,66 @@ def test_hybrid_bm25_operators(collection_factory: CollectionFactory) -> None: object_limit=10, ) assert res.total_count == 4 + + +def test_aggregate_empty_result_none_values(collection_factory: CollectionFactory) -> None: + """Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0.""" + collection = collection_factory( + properties=[ + Property( + name="bucket", + data_type=DataType.INT, + index_filterable=True, + index_range_filters=True, + ), + Property(name="intVal", data_type=DataType.INT), + Property(name="numberVal", data_type=DataType.NUMBER), + ], + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index(index_null_state=True), + ) + + if collection._connection._weaviate_version.is_lower_than(1, 29, 0): + pytest.skip("gRPC aggregates are only supported in versions 1.29.0 and higher") + + # Insert one object with bucket=0 + collection.data.insert({"bucket": 0, "intVal": 123, "numberVal": 456.78}) + + # Query with a filter that returns no results (bucket=99 doesn't exist) + res: AggregateReturn = collection.aggregate.over_all( + filters=Filter.by_property("bucket").equal(99), + total_count=True, + return_metrics=[ + Metrics("intVal").integer( + count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True + ), + Metrics("numberVal").number( + count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True + ), + ], + ) + + # Verify total_count is 0 + assert res.total_count == 0 + + # Verify integer metrics: count should be 0, all other metrics should be None (not 0) + int_metrics = res.properties["intVal"] + assert isinstance(int_metrics, AggregateInteger) + assert int_metrics.count == 0 + assert int_metrics.maximum is None, "maximum should be None for empty result set" + assert int_metrics.mean is None, "mean should be None for empty result set" + assert int_metrics.median is None, "median should be None for empty result set" + assert int_metrics.minimum is None, "minimum should be None for empty result set" + assert int_metrics.mode is None, "mode should be None for empty result set" + assert int_metrics.sum_ is None, "sum_ should be None for empty result set" + + # Verify number metrics: count should be 0, all other metrics should be None (not 0.0) + number_metrics = res.properties["numberVal"] + assert isinstance(number_metrics, AggregateNumber) + assert number_metrics.count == 0 + assert number_metrics.maximum is None, "maximum should be None for empty result set" + assert number_metrics.mean is None, "mean should be None for empty result set" + assert number_metrics.median is None, "median should be None for empty result set" + assert number_metrics.minimum is None, "minimum should be None for empty result set" + assert number_metrics.mode is None, "mode should be None for empty result set" + assert number_metrics.sum_ is None, "sum_ should be None for empty result set" diff --git a/weaviate/collections/aggregations/base_executor.py b/weaviate/collections/aggregations/base_executor.py index d316fe76a..90cc72089 100644 --- a/weaviate/collections/aggregations/base_executor.py +++ b/weaviate/collections/aggregations/base_executor.py @@ -269,38 +269,50 @@ def __parse_property_grpc( elif aggregation.HasField("int"): return AggregateInteger( count=aggregation.int.count, - maximum=aggregation.int.maximum, - mean=aggregation.int.mean, - median=aggregation.int.median, - minimum=aggregation.int.minimum, - mode=aggregation.int.mode, - sum_=aggregation.int.sum, + maximum=aggregation.int.maximum if aggregation.int.HasField("maximum") else None, + mean=aggregation.int.mean if aggregation.int.HasField("mean") else None, + median=aggregation.int.median if aggregation.int.HasField("median") else None, + minimum=aggregation.int.minimum if aggregation.int.HasField("minimum") else None, + mode=aggregation.int.mode if aggregation.int.HasField("mode") else None, + sum_=aggregation.int.sum if aggregation.int.HasField("sum") else None, ) elif aggregation.HasField("number"): return AggregateNumber( count=aggregation.number.count, - maximum=aggregation.number.maximum, - mean=aggregation.number.mean, - median=aggregation.number.median, - minimum=aggregation.number.minimum, - mode=aggregation.number.mode, - sum_=aggregation.number.sum, + maximum=aggregation.number.maximum + if aggregation.number.HasField("maximum") + else None, + mean=aggregation.number.mean if aggregation.number.HasField("mean") else None, + median=aggregation.number.median if aggregation.number.HasField("median") else None, + minimum=aggregation.number.minimum + if aggregation.number.HasField("minimum") + else None, + mode=aggregation.number.mode if aggregation.number.HasField("mode") else None, + sum_=aggregation.number.sum if aggregation.number.HasField("sum") else None, ) elif aggregation.HasField("boolean"): return AggregateBoolean( count=aggregation.boolean.count, - percentage_false=aggregation.boolean.percentage_false, - percentage_true=aggregation.boolean.percentage_true, - total_false=aggregation.boolean.total_false, - total_true=aggregation.boolean.total_true, + percentage_false=aggregation.boolean.percentage_false + if aggregation.boolean.HasField("percentage_false") + else None, + percentage_true=aggregation.boolean.percentage_true + if aggregation.boolean.HasField("percentage_true") + else None, + total_false=aggregation.boolean.total_false + if aggregation.boolean.HasField("total_false") + else None, + total_true=aggregation.boolean.total_true + if aggregation.boolean.HasField("total_true") + else None, ) elif aggregation.HasField("date"): return AggregateDate( count=aggregation.date.count, - maximum=aggregation.date.maximum, - median=aggregation.date.median, - minimum=aggregation.date.minimum, - mode=aggregation.date.mode, + maximum=aggregation.date.maximum if aggregation.date.HasField("maximum") else None, + median=aggregation.date.median if aggregation.date.HasField("median") else None, + minimum=aggregation.date.minimum if aggregation.date.HasField("minimum") else None, + mode=aggregation.date.mode if aggregation.date.HasField("mode") else None, ) elif aggregation.HasField("reference"): return AggregateReference(pointing_to=list(aggregation.reference.pointing_to))