diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java index 4c2c8176690..75e98da9ee6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java @@ -512,6 +512,25 @@ public void testDateBetween() throws IOException { actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00")); } + /** + * A timestamp range comparison AND'd with an {@code IN} clause must push down and return rows. + */ + @Test + public void testTimestampRangeWithInClausePushDown() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | where birthdate > timestamp('2018-06-01 00:00:00') | where state in" + + " ('IL', 'TN', 'WA') | fields firstname, state, birthdate", + TEST_INDEX_BANK)); + verifySchema( + actual, + schema("firstname", "string"), + schema("state", "string"), + schema("birthdate", "timestamp")); + verifyDataRows(actual, rows("Elinor", "WA", "2018-06-27 00:00:00")); + } + @Test public void testXor() throws IOException { JSONObject result = diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5481.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5481.yml new file mode 100644 index 00000000000..4207a6c3005 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5481.yml @@ -0,0 +1,69 @@ +# Issue: https://github.com/opensearch-project/sql/issues/5481 +# A timestamp range comparison AND'd with an IN clause on another field must push down and +# return rows. Calcite folds the IN into a Sarg and strips the timestamp literal's UDT; without +# the field-type-keyed fix the range query ships an unformatted date and the shard rejects it. +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5481 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + event_time: + type: date + severity: + type: keyword + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5481", "_id": "1"}}' + - '{"event_time": "2026-05-28T10:00:00Z", "severity": "ERROR"}' + - '{"index": {"_index": "issue5481", "_id": "2"}}' + - '{"event_time": "2026-05-28T10:05:00Z", "severity": "WARN"}' + - '{"index": {"_index": "issue5481", "_id": "3"}}' + - '{"event_time": "2026-05-28T10:10:00Z", "severity": "INFO"}' + - '{"index": {"_index": "issue5481", "_id": "4"}}' + - '{"event_time": "2026-05-28T10:15:00Z", "severity": "ERROR"}' + - '{"index": {"_index": "issue5481", "_id": "5"}}' + - '{"event_time": "2026-05-28T10:20:00Z", "severity": "WARN"}' + - '{"index": {"_index": "issue5481", "_id": "6"}}' + - '{"event_time": "2026-05-28T10:25:00Z", "severity": "DEBUG"}' + +--- +teardown: + - do: + indices.delete: + index: issue5481 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5481: timestamp range AND keyword IN pushes down and returns rows": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5481 | where event_time > timestamp('2026-05-28 10:08:00') | where severity in ('ERROR', 'WARN') | fields severity, event_time | sort event_time + + - match: { total: 2 } + - match: { schema: [ { name: severity, type: "string" }, { name: event_time, type: "timestamp" } ] } + - match: { datarows: [ [ "ERROR", "2026-05-28 10:15:00" ], [ "WARN", "2026-05-28 10:20:00" ] ] } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 6389fb28395..6aba8fba2ec 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -1378,10 +1378,12 @@ public QueryExpression notLike(LiteralExpression literal) { @Override public QueryExpression equals(LiteralExpression literal) { - Object value = literal.value(); - if (literal.isDateTime()) { + boolean isTimeStamp = isFieldOrLiteralDateTime(literal); + Object value = convertEndpointValue(literal.value(), isTimeStamp); + if (isTimeStamp) { builder = - addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value).lte(value)); + addFormatIfNecessary( + isTimeStamp, rangeQuery(getFieldReference()).gte(value).lte(value)); } else { builder = termQuery(getFieldReferenceForTermQuery(), value); } @@ -1390,12 +1392,15 @@ public QueryExpression equals(LiteralExpression literal) { @Override public QueryExpression notEquals(LiteralExpression literal) { - Object value = literal.value(); - if (literal.isDateTime()) { + boolean isTimeStamp = isFieldOrLiteralDateTime(literal); + Object value = convertEndpointValue(literal.value(), isTimeStamp); + if (isTimeStamp) { builder = boolQuery() - .should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value))) - .should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value))); + .should( + addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value))) + .should( + addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value))); } else { builder = boolQuery() @@ -1408,32 +1413,48 @@ public QueryExpression notEquals(LiteralExpression literal) { @Override public QueryExpression gt(LiteralExpression literal) { - Object value = literal.value(); - builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value)); + boolean isTimeStamp = isFieldOrLiteralDateTime(literal); + Object value = convertEndpointValue(literal.value(), isTimeStamp); + builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value)); return this; } @Override public QueryExpression gte(LiteralExpression literal) { - Object value = literal.value(); - builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value)); + boolean isTimeStamp = isFieldOrLiteralDateTime(literal); + Object value = convertEndpointValue(literal.value(), isTimeStamp); + builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gte(value)); return this; } @Override public QueryExpression lt(LiteralExpression literal) { - Object value = literal.value(); - builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value)); + boolean isTimeStamp = isFieldOrLiteralDateTime(literal); + Object value = convertEndpointValue(literal.value(), isTimeStamp); + builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value)); return this; } @Override public QueryExpression lte(LiteralExpression literal) { - Object value = literal.value(); - builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lte(value)); + boolean isTimeStamp = isFieldOrLiteralDateTime(literal); + Object value = convertEndpointValue(literal.value(), isTimeStamp); + builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lte(value)); return this; } + /** + * Whether the comparison is a timestamp/date range. The field type is the reliable signal: + * {@code literal.isDateTime()} reads the literal's UDT, which {@link + * org.apache.calcite.rex.RexSimplify} can strip (to VARCHAR) when a sibling clause is folded + * into a {@code Sarg}, e.g. {@code @timestamp > X AND severityText IN (...)}. Falling back to + * {@code rel.isTimeStampType()} keeps ISO-8601 normalization and the {@code "date_time"} format + * hint on the range query. + */ + private boolean isFieldOrLiteralDateTime(LiteralExpression literal) { + return literal.isDateTime() || (rel != null && rel.isTimeStampType()); + } + @Override public QueryExpression match(String query, Map optionalArguments) { builder = new MatchQuery().build(getFieldReference(), query, optionalArguments); @@ -1580,6 +1601,11 @@ public QueryExpression between(Range range, boolean isTimeStamp) { } private Object convertEndpointValue(Object value, boolean isTimeStamp) { + // Shared normalization entry point: guard a null endpoint so the timestamp branch's + // value.toString() cannot NPE. sargPointValue never produces null from a non-null input. + if (value == null) { + return null; + } value = sargPointValue(value); return isTimeStamp ? timestampValueForPushDown(value.toString()) : value; } @@ -1712,16 +1738,19 @@ public static ScriptSortBuilder.ScriptSortType getScriptSortType(RelDataType rel } /** - * By default, range queries on date/time need use the format of the source to parse the literal. - * So we need to specify that the literal has "date_time" format + * Range queries on date/time fields need the source format to parse the literal, so we attach the + * {@code "date_time"} format. The caller resolves whether the comparison is a timestamp range + * from the field type (see {@link SimpleQueryExpression#isFieldOrLiteralDateTime}) rather than + * the literal's UDT, which {@link org.apache.calcite.rex.RexSimplify} can strip when a sibling + * clause is folded into a {@code Sarg}. * - * @param literal literal value - * @param rangeQueryBuilder query builder to optionally add {@code format} expression - * @return existing builder with possible {@code format} attribute + * @param isTimeStamp whether the comparison endpoint is a timestamp/date range endpoint + * @param rangeQueryBuilder query builder to optionally add the {@code format} attribute + * @return the same builder, with {@code format("date_time")} added when {@code isTimeStamp} */ private static RangeQueryBuilder addFormatIfNecessary( - LiteralExpression literal, RangeQueryBuilder rangeQueryBuilder) { - if (literal.isDateTime()) { + boolean isTimeStamp, RangeQueryBuilder rangeQueryBuilder) { + if (isTimeStamp) { rangeQueryBuilder.format("date_time"); } return rangeQueryBuilder; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 3deb39d58c0..041063f62a0 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -1147,6 +1147,138 @@ void notEquals_generatesBoolQueryForDateTime() throws ExpressionNotAnalyzableExc result.toString()); } + /** + * RexSimplify can strip the EXPR_TIMESTAMP UDT off a literal when a sibling clause is folded into + * a Sarg (e.g. {@code @timestamp > X AND severityText IN (...)}), leaving the literal as plain + * VARCHAR. The comparison must still emit a {@code format("date_time")} range query keyed off the + * field's type so the shard's default date parser accepts the value. + */ + @Test + void gt_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException { + RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56"); + RexNode call = builder.makeCall(SqlStdOperatorTable.GREATER_THAN, field4, varcharLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(RangeQueryBuilder.class, result); + assertEquals( + """ + { + "range" : { + "d" : { + "from" : "1987-02-03T04:34:56.000Z", + "to" : null, + "include_lower" : false, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 + } + } + }\ + """, + result.toString()); + } + + // Companion stripped-VARCHAR-literal tests for the remaining range shapes (equals -> gte+lte, + // notEquals -> two-should bool, lte -> single range). Each must produce the same DSL as its + // intact-UDT counterpart, proving the field-type fallback in isFieldOrLiteralDateTime keeps + // ISO-8601 normalization + format("date_time") on every comparison op, not just gt. See #5481. + @Test + void equals_normalizesVarcharLiteralAgainstTimestampField() + throws ExpressionNotAnalyzableException { + RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56"); + RexNode call = builder.makeCall(SqlStdOperatorTable.EQUALS, field4, varcharLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(RangeQueryBuilder.class, result); + assertEquals( + """ + { + "range" : { + "d" : { + "from" : "1987-02-03T04:34:56.000Z", + "to" : "1987-02-03T04:34:56.000Z", + "include_lower" : true, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 + } + } + }\ + """, + result.toString()); + } + + @Test + void notEquals_normalizesVarcharLiteralAgainstTimestampField() + throws ExpressionNotAnalyzableException { + RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56"); + RexNode call = builder.makeCall(SqlStdOperatorTable.NOT_EQUALS, field4, varcharLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "should" : [ + { + "range" : { + "d" : { + "from" : "1987-02-03T04:34:56.000Z", + "to" : null, + "include_lower" : false, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 + } + } + }, + { + "range" : { + "d" : { + "from" : null, + "to" : "1987-02-03T04:34:56.000Z", + "include_lower" : true, + "include_upper" : false, + "format" : "date_time", + "boost" : 1.0 + } + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } + + @Test + void lte_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException { + RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56"); + RexNode call = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, field4, varcharLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(RangeQueryBuilder.class, result); + assertEquals( + """ + { + "range" : { + "d" : { + "from" : null, + "to" : "1987-02-03T04:34:56.000Z", + "include_lower" : true, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 + } + } + }\ + """, + result.toString()); + } + @Test void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzableException { RexNode call =