From d94319b4195a4cfd7526e1736ced513b654d0df8 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 8 Jun 2026 15:24:56 -0700 Subject: [PATCH 1/2] Use a parquet-backed index in CalcitePPLAggregationIT.testSimpleCount0 A bare auto-created index isn't composite/parquet-backed, so on the analytics-engine route it doesn't route to the analytics engine. Switch to TEST_INDEX_BANK (loaded via loadIndex, which injects parquet settings when the flag is set, 7 docs) so the test is meaningful on both routes. Diagnosis by Sandesh Kumar. Signed-off-by: Kai Huang --- .../calcite/remote/CalcitePPLAggregationIT.java | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index bd4c68b85d..5c8f6eb2cf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -25,7 +25,6 @@ import java.util.List; import org.json.JSONObject; import org.junit.jupiter.api.Test; -import org.opensearch.client.Request; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -52,16 +51,11 @@ public void init() throws Exception { @Test public void testSimpleCount0() throws IOException { - Request request1 = new Request("PUT", "/test/_doc/1?refresh=true"); - request1.setJsonEntity("{\"name\": \"hello\", \"age\": 20}"); - client().performRequest(request1); - Request request2 = new Request("PUT", "/test/_doc/2?refresh=true"); - request2.setJsonEntity("{\"name\": \"world\", \"age\": 30}"); - client().performRequest(request2); - - JSONObject actual = executeQuery("source=test | stats count() as c"); + // A bare auto-created index isn't parquet-backed; use the parquet-aware bank index (7 docs). + JSONObject actual = + executeQuery(String.format("source=%s | stats count() as c", TEST_INDEX_BANK)); verifySchema(actual, schema("c", "bigint")); - verifyDataRows(actual, rows(2)); + verifyDataRows(actual, rows(7)); } @Test From 2a8ca85d68df5d328b6dca645249428551ae9990 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 8 Jun 2026 15:24:57 -0700 Subject: [PATCH 2/2] Emit APPROX_COUNT_DISTINCT as the distinct_count_approx runtime name distinct_count_approx() failed to bind on the analytics-engine (DataFusion) route because the SqlAggFunction was named DISTINCT_COUNT_APPROX; the backend resolves aggregates by the Calcite/Substrait-standard name APPROX_COUNT_DISTINCT. The Java field name and PPL function name are unchanged. The OpenSearch V3 path is unaffected (it overrides this via the external HLL registration). Analytics-route binding is completed by opensearch-project/OpenSearch#22013. Per Sandesh Kumar. Signed-off-by: Kai Huang --- .../sql/expression/function/PPLBuiltinOperators.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 60172e70c8..a24015de99 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -521,7 +521,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlAggFunction DISTINCT_COUNT_APPROX = createUserDefinedAggFunction( DistinctCountApproxLogicalAggFunction.class, - "DISTINCT_COUNT_APPROX", + // Substrait-standard name the analytics-engine backend resolves by (V3 overrides it). + "APPROX_COUNT_DISTINCT", ReturnTypes.BIGINT_FORCE_NULLABLE, null);