From 6179b13660d9365ed33548a12c91749264fb6926 Mon Sep 17 00:00:00 2001 From: Clemens Portele Date: Tue, 16 Jun 2026 23:27:03 +0200 Subject: [PATCH] fix slow single-value array-contains filter on multi-valued properties A_CONTAINS on a multi-valued property is encoded as ... IN () GROUP BY HAVING count(distinct ) = For a single value n is 1 and the HAVING is tautological: every group selected by IN () trivially has one distinct value, so the predicate is always true. PostgreSQL has no selectivity estimate for a post-aggregate count(distinct) filter and falls back to a tiny default row count. That misestimate can drive a nested-loop plan that re-evaluates other predicates per row (for example a large id membership set), making the query extremely slow. Drop the redundant HAVING for the single-value case (equivalent to the A_OVERLAPS encoding) so the planner sees an accurate grouped-row count. Multi-value filters keep the HAVING, which remains semantically required. --- .../cql/app/CqlFilterExamples.java | 4 ++ .../features/sql/app/FilterEncoderSql.java | 44 +++++++++++++------ .../sql/app/FilterEncoderSqlSpec.groovy | 17 +++++++ 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/xtraplatform-cql/src/testFixtures/java/de/ii/xtraplatform/cql/app/CqlFilterExamples.java b/xtraplatform-cql/src/testFixtures/java/de/ii/xtraplatform/cql/app/CqlFilterExamples.java index 9aaec4e69..6ce08b46f 100644 --- a/xtraplatform-cql/src/testFixtures/java/de/ii/xtraplatform/cql/app/CqlFilterExamples.java +++ b/xtraplatform-cql/src/testFixtures/java/de/ii/xtraplatform/cql/app/CqlFilterExamples.java @@ -993,6 +993,10 @@ public class CqlFilterExamples { Property.of("location"), ArrayLiteral.of(ImmutableList.of(ScalarLiteral.of("id"), ScalarLiteral.of("location")))); + public static final Cql2Expression EXAMPLE_AContains_SingleValue_ValidFor_JOINED_GEOMETRY = + AContains.of( + Property.of("location"), ArrayLiteral.of(ImmutableList.of(ScalarLiteral.of("id")))); + public static final Cql2Expression EXAMPLE_AEquals_ValidFor_JOINED_GEOMETRY = AEquals.of( Property.of("location"), diff --git a/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java b/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java index a9d3a0bbf..c0babff6b 100644 --- a/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java +++ b/xtraplatform-features-sql/src/main/java/de/ii/xtraplatform/features/sql/app/FilterEncoderSql.java @@ -1135,14 +1135,22 @@ public String visit(BinaryArrayOperation arrayOperation, List children) if (notInverse ? arrayOperation.getArrayOperator() == A_CONTAINS : arrayOperation.getArrayOperator() == A_CONTAINEDBY) { + // for a single required value the `HAVING count(distinct …) = 1` is tautological (every + // group selected by `IN ()` trivially has one distinct value); dropping it + // keeps the result identical but lets the planner estimate the grouped row count, which + // it cannot do across a count(distinct) HAVING filter String arrayQuery = - String.format( - " IN %1$s GROUP BY %2$s.%3$s HAVING count(distinct %4$s) = %5$s", - secondExpression, - aliases.get(0), - rootSchema.getSortKey().get(), - qualifiedColumn.first(), - elementCount); + elementCount == 1 + ? String.format( + " IN %1$s GROUP BY %2$s.%3$s", + secondExpression, aliases.get(0), rootSchema.getSortKey().get()) + : String.format( + " IN %1$s GROUP BY %2$s.%3$s HAVING count(distinct %4$s) = %5$s", + secondExpression, + aliases.get(0), + rootSchema.getSortKey().get(), + qualifiedColumn.first(), + elementCount); return String.format(mainExpression, "", arrayQuery); } else if (arrayOperation.getArrayOperator() == A_EQUALS) { String arrayQuery = @@ -2118,14 +2126,22 @@ public String visit(BinaryArrayOperation arrayOperation, List children) if (notInverse ? arrayOperation.getArrayOperator() == A_CONTAINS : arrayOperation.getArrayOperator() == A_CONTAINEDBY) { + // for a single required value the `HAVING count(distinct …) = 1` is tautological (every + // group selected by `IN ()` trivially has one distinct value); dropping it + // keeps the result identical but lets the planner estimate the grouped row count, which + // it cannot do across a count(distinct) HAVING filter String arrayQuery = - String.format( - " IN %1$s GROUP BY %2$s.%3$s HAVING count(distinct %4$s) = %5$s", - secondExpression, - aliases.get(0), - mapping.getMainTable().getSortKey(), - qualifiedColumn.first(), - elementCount); + elementCount == 1 + ? String.format( + " IN %1$s GROUP BY %2$s.%3$s", + secondExpression, aliases.get(0), mapping.getMainTable().getSortKey()) + : String.format( + " IN %1$s GROUP BY %2$s.%3$s HAVING count(distinct %4$s) = %5$s", + secondExpression, + aliases.get(0), + mapping.getMainTable().getSortKey(), + qualifiedColumn.first(), + elementCount); return String.format(mainExpression, "", arrayQuery); } else if (arrayOperation.getArrayOperator() == A_EQUALS) { String arrayQuery = diff --git a/xtraplatform-features-sql/src/test/groovy/de/ii/xtraplatform/features/sql/app/FilterEncoderSqlSpec.groovy b/xtraplatform-features-sql/src/test/groovy/de/ii/xtraplatform/features/sql/app/FilterEncoderSqlSpec.groovy index 9e7c15986..cc4a78655 100644 --- a/xtraplatform-features-sql/src/test/groovy/de/ii/xtraplatform/features/sql/app/FilterEncoderSqlSpec.groovy +++ b/xtraplatform-features-sql/src/test/groovy/de/ii/xtraplatform/features/sql/app/FilterEncoderSqlSpec.groovy @@ -714,6 +714,23 @@ class FilterEncoderSqlSpec extends Specification { } + def 'test AContains with a single value omits the tautological HAVING'() { + + given: + def instanceContainer = QuerySchemaFixtures.JOINED_GEOMETRY + def filter = CqlFilterExamples.EXAMPLE_AContains_SingleValue_ValidFor_JOINED_GEOMETRY + + when: + String expected = "A.id IN (SELECT AA.id FROM building AA JOIN geometry AB ON (AA.id=AB.id) WHERE AB.location IN ('id') GROUP BY AA.id)" + + String actual = filterEncoder.encode(filter, instanceContainer) + + then: + + actual == expected + + } + def 'test AContains with not'() { given: