Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions integration/test_collection_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,3 +884,66 @@ def test_hybrid_bm25_operators(collection_factory: CollectionFactory) -> None:
object_limit=10,
)
assert res.total_count == 4


def test_aggregate_empty_result_none_values(collection_factory: CollectionFactory) -> None:
"""Test for issue #11219: aggregate metrics should be None for empty result sets, not 0/0.0."""
collection = collection_factory(
properties=[
Property(
name="bucket",
data_type=DataType.INT,
index_filterable=True,
index_range_filters=True,
),
Property(name="intVal", data_type=DataType.INT),
Property(name="numberVal", data_type=DataType.NUMBER),
],
vectorizer_config=Configure.Vectorizer.none(),
inverted_index_config=Configure.inverted_index(index_null_state=True),
)

if collection._connection._weaviate_version.is_lower_than(1, 29, 0):
pytest.skip("gRPC aggregates are only supported in versions 1.29.0 and higher")

# Insert one object with bucket=0
collection.data.insert({"bucket": 0, "intVal": 123, "numberVal": 456.78})

# Query with a filter that returns no results (bucket=99 doesn't exist)
res: AggregateReturn = collection.aggregate.over_all(
filters=Filter.by_property("bucket").equal(99),
total_count=True,
return_metrics=[
Metrics("intVal").integer(
count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True
),
Metrics("numberVal").number(
count=True, maximum=True, mean=True, median=True, minimum=True, mode=True, sum_=True
),
],
)

# Verify total_count is 0
assert res.total_count == 0

# Verify integer metrics: count should be 0, all other metrics should be None (not 0)
int_metrics = res.properties["intVal"]
assert isinstance(int_metrics, AggregateInteger)
assert int_metrics.count == 0
assert int_metrics.maximum is None, "maximum should be None for empty result set"
assert int_metrics.mean is None, "mean should be None for empty result set"
assert int_metrics.median is None, "median should be None for empty result set"
assert int_metrics.minimum is None, "minimum should be None for empty result set"
assert int_metrics.mode is None, "mode should be None for empty result set"
assert int_metrics.sum_ is None, "sum_ should be None for empty result set"

# Verify number metrics: count should be 0, all other metrics should be None (not 0.0)
number_metrics = res.properties["numberVal"]
assert isinstance(number_metrics, AggregateNumber)
assert number_metrics.count == 0
assert number_metrics.maximum is None, "maximum should be None for empty result set"
assert number_metrics.mean is None, "mean should be None for empty result set"
assert number_metrics.median is None, "median should be None for empty result set"
assert number_metrics.minimum is None, "minimum should be None for empty result set"
assert number_metrics.mode is None, "mode should be None for empty result set"
assert number_metrics.sum_ is None, "sum_ should be None for empty result set"
52 changes: 32 additions & 20 deletions weaviate/collections/aggregations/base_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,38 +269,50 @@ def __parse_property_grpc(
elif aggregation.HasField("int"):
return AggregateInteger(
count=aggregation.int.count,
maximum=aggregation.int.maximum,
mean=aggregation.int.mean,
median=aggregation.int.median,
minimum=aggregation.int.minimum,
mode=aggregation.int.mode,
sum_=aggregation.int.sum,
maximum=aggregation.int.maximum if aggregation.int.HasField("maximum") else None,
mean=aggregation.int.mean if aggregation.int.HasField("mean") else None,
median=aggregation.int.median if aggregation.int.HasField("median") else None,
minimum=aggregation.int.minimum if aggregation.int.HasField("minimum") else None,
mode=aggregation.int.mode if aggregation.int.HasField("mode") else None,
sum_=aggregation.int.sum if aggregation.int.HasField("sum") else None,
)
elif aggregation.HasField("number"):
return AggregateNumber(
count=aggregation.number.count,
maximum=aggregation.number.maximum,
mean=aggregation.number.mean,
median=aggregation.number.median,
minimum=aggregation.number.minimum,
mode=aggregation.number.mode,
sum_=aggregation.number.sum,
maximum=aggregation.number.maximum
if aggregation.number.HasField("maximum")
else None,
mean=aggregation.number.mean if aggregation.number.HasField("mean") else None,
median=aggregation.number.median if aggregation.number.HasField("median") else None,
minimum=aggregation.number.minimum
if aggregation.number.HasField("minimum")
else None,
mode=aggregation.number.mode if aggregation.number.HasField("mode") else None,
sum_=aggregation.number.sum if aggregation.number.HasField("sum") else None,
)
elif aggregation.HasField("boolean"):
return AggregateBoolean(
count=aggregation.boolean.count,
percentage_false=aggregation.boolean.percentage_false,
percentage_true=aggregation.boolean.percentage_true,
total_false=aggregation.boolean.total_false,
total_true=aggregation.boolean.total_true,
percentage_false=aggregation.boolean.percentage_false
if aggregation.boolean.HasField("percentage_false")
else None,
percentage_true=aggregation.boolean.percentage_true
if aggregation.boolean.HasField("percentage_true")
else None,
total_false=aggregation.boolean.total_false
if aggregation.boolean.HasField("total_false")
else None,
total_true=aggregation.boolean.total_true
if aggregation.boolean.HasField("total_true")
else None,
)
elif aggregation.HasField("date"):
return AggregateDate(
count=aggregation.date.count,
maximum=aggregation.date.maximum,
median=aggregation.date.median,
minimum=aggregation.date.minimum,
mode=aggregation.date.mode,
maximum=aggregation.date.maximum if aggregation.date.HasField("maximum") else None,
median=aggregation.date.median if aggregation.date.HasField("median") else None,
minimum=aggregation.date.minimum if aggregation.date.HasField("minimum") else None,
mode=aggregation.date.mode if aggregation.date.HasField("mode") else None,
)
elif aggregation.HasField("reference"):
return AggregateReference(pointing_to=list(aggregation.reference.pointing_to))
Expand Down
Loading