Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,25 @@ public void testDateBetween() throws IOException {
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
}

/**
* A timestamp range comparison AND'd with an {@code IN} clause must push down and return rows.
*/
@Test
public void testTimestampRangeWithInClausePushDown() throws IOException {
JSONObject actual =
executeQuery(
String.format(
"source=%s | where birthdate > timestamp('2018-06-01 00:00:00') | where state in"
+ " ('IL', 'TN', 'WA') | fields firstname, state, birthdate",
TEST_INDEX_BANK));
verifySchema(
actual,
schema("firstname", "string"),
schema("state", "string"),
schema("birthdate", "timestamp"));
verifyDataRows(actual, rows("Elinor", "WA", "2018-06-27 00:00:00"));
}

@Test
public void testXor() throws IOException {
JSONObject result =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Issue: https://github.com/opensearch-project/sql/issues/5481
# A timestamp range comparison AND'd with an IN clause on another field must push down and
# return rows. Calcite folds the IN into a Sarg and strips the timestamp literal's UDT; without
# the field-type-keyed fix the range query ships an unformatted date and the shard rejects it.
setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true

- do:
indices.create:
index: issue5481
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
event_time:
type: date
severity:
type: keyword

- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "issue5481", "_id": "1"}}'
- '{"event_time": "2026-05-28T10:00:00Z", "severity": "ERROR"}'
- '{"index": {"_index": "issue5481", "_id": "2"}}'
- '{"event_time": "2026-05-28T10:05:00Z", "severity": "WARN"}'
- '{"index": {"_index": "issue5481", "_id": "3"}}'
- '{"event_time": "2026-05-28T10:10:00Z", "severity": "INFO"}'
- '{"index": {"_index": "issue5481", "_id": "4"}}'
- '{"event_time": "2026-05-28T10:15:00Z", "severity": "ERROR"}'
- '{"index": {"_index": "issue5481", "_id": "5"}}'
- '{"event_time": "2026-05-28T10:20:00Z", "severity": "WARN"}'
- '{"index": {"_index": "issue5481", "_id": "6"}}'
- '{"event_time": "2026-05-28T10:25:00Z", "severity": "DEBUG"}'

---
teardown:
- do:
indices.delete:
index: issue5481
ignore_unavailable: true
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false

---
"Issue 5481: timestamp range AND keyword IN pushes down and returns rows":
- skip:
features:
- headers
- do:
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=issue5481 | where event_time > timestamp('2026-05-28 10:08:00') | where severity in ('ERROR', 'WARN') | fields severity, event_time | sort event_time

- match: { total: 2 }
- match: { schema: [ { name: severity, type: "string" }, { name: event_time, type: "timestamp" } ] }
- match: { datarows: [ [ "ERROR", "2026-05-28 10:15:00" ], [ "WARN", "2026-05-28 10:20:00" ] ] }
Original file line number Diff line number Diff line change
Expand Up @@ -1378,10 +1378,12 @@ public QueryExpression notLike(LiteralExpression literal) {

@Override
public QueryExpression equals(LiteralExpression literal) {
Object value = literal.value();
if (literal.isDateTime()) {
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
Object value = convertEndpointValue(literal.value(), isTimeStamp);
if (isTimeStamp) {
builder =
addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value).lte(value));
addFormatIfNecessary(
isTimeStamp, rangeQuery(getFieldReference()).gte(value).lte(value));
} else {
builder = termQuery(getFieldReferenceForTermQuery(), value);
}
Expand All @@ -1390,12 +1392,15 @@ public QueryExpression equals(LiteralExpression literal) {

@Override
public QueryExpression notEquals(LiteralExpression literal) {
Object value = literal.value();
if (literal.isDateTime()) {
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
Object value = convertEndpointValue(literal.value(), isTimeStamp);
if (isTimeStamp) {
builder =
boolQuery()
.should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value)))
.should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value)));
.should(
addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value)))
.should(
addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value)));
} else {
builder =
boolQuery()
Expand All @@ -1408,32 +1413,48 @@ public QueryExpression notEquals(LiteralExpression literal) {

@Override
public QueryExpression gt(LiteralExpression literal) {
Object value = literal.value();
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value));
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
Object value = convertEndpointValue(literal.value(), isTimeStamp);
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value));
return this;
}

@Override
public QueryExpression gte(LiteralExpression literal) {
Object value = literal.value();
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value));
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
Object value = convertEndpointValue(literal.value(), isTimeStamp);
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gte(value));
return this;
}

@Override
public QueryExpression lt(LiteralExpression literal) {
Object value = literal.value();
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value));
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
Object value = convertEndpointValue(literal.value(), isTimeStamp);
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value));
return this;
}

@Override
public QueryExpression lte(LiteralExpression literal) {
Object value = literal.value();
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lte(value));
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
Object value = convertEndpointValue(literal.value(), isTimeStamp);
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lte(value));
return this;
}

/**
* Whether the comparison is a timestamp/date range. The field type is the reliable signal:
* {@code literal.isDateTime()} reads the literal's UDT, which {@link
* org.apache.calcite.rex.RexSimplify} can strip (to VARCHAR) when a sibling clause is folded
* into a {@code Sarg}, e.g. {@code @timestamp > X AND severityText IN (...)}. Falling back to
* {@code rel.isTimeStampType()} keeps ISO-8601 normalization and the {@code "date_time"} format
* hint on the range query.
*/
private boolean isFieldOrLiteralDateTime(LiteralExpression literal) {
return literal.isDateTime() || (rel != null && rel.isTimeStampType());
}

@Override
public QueryExpression match(String query, Map<String, String> optionalArguments) {
builder = new MatchQuery().build(getFieldReference(), query, optionalArguments);
Expand Down Expand Up @@ -1580,6 +1601,11 @@ public QueryExpression between(Range<?> range, boolean isTimeStamp) {
}

private Object convertEndpointValue(Object value, boolean isTimeStamp) {
// Shared normalization entry point: guard a null endpoint so the timestamp branch's
// value.toString() cannot NPE. sargPointValue never produces null from a non-null input.
if (value == null) {
return null;
}
value = sargPointValue(value);
return isTimeStamp ? timestampValueForPushDown(value.toString()) : value;
}
Expand Down Expand Up @@ -1712,16 +1738,19 @@ public static ScriptSortBuilder.ScriptSortType getScriptSortType(RelDataType rel
}

/**
* By default, range queries on date/time need use the format of the source to parse the literal.
* So we need to specify that the literal has "date_time" format
* Range queries on date/time fields need the source format to parse the literal, so we attach the
* {@code "date_time"} format. The caller resolves whether the comparison is a timestamp range
* from the field type (see {@link SimpleQueryExpression#isFieldOrLiteralDateTime}) rather than
* the literal's UDT, which {@link org.apache.calcite.rex.RexSimplify} can strip when a sibling
* clause is folded into a {@code Sarg}.
*
* @param literal literal value
* @param rangeQueryBuilder query builder to optionally add {@code format} expression
* @return existing builder with possible {@code format} attribute
* @param isTimeStamp whether the comparison endpoint is a timestamp/date range endpoint
* @param rangeQueryBuilder query builder to optionally add the {@code format} attribute
* @return the same builder, with {@code format("date_time")} added when {@code isTimeStamp}
*/
private static RangeQueryBuilder addFormatIfNecessary(
LiteralExpression literal, RangeQueryBuilder rangeQueryBuilder) {
if (literal.isDateTime()) {
boolean isTimeStamp, RangeQueryBuilder rangeQueryBuilder) {
if (isTimeStamp) {
rangeQueryBuilder.format("date_time");
}
return rangeQueryBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,138 @@ void notEquals_generatesBoolQueryForDateTime() throws ExpressionNotAnalyzableExc
result.toString());
}

/**
* RexSimplify can strip the EXPR_TIMESTAMP UDT off a literal when a sibling clause is folded into
* a Sarg (e.g. {@code @timestamp > X AND severityText IN (...)}), leaving the literal as plain
* VARCHAR. The comparison must still emit a {@code format("date_time")} range query keyed off the
* field's type so the shard's default date parser accepts the value.
*/
@Test
void gt_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException {
Comment thread
RyanL1997 marked this conversation as resolved.
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
RexNode call = builder.makeCall(SqlStdOperatorTable.GREATER_THAN, field4, varcharLiteral);
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);

assertInstanceOf(RangeQueryBuilder.class, result);
assertEquals(
"""
{
"range" : {
"d" : {
"from" : "1987-02-03T04:34:56.000Z",
"to" : null,
"include_lower" : false,
"include_upper" : true,
"format" : "date_time",
"boost" : 1.0
}
}
}\
""",
result.toString());
}

// Companion stripped-VARCHAR-literal tests for the remaining range shapes (equals -> gte+lte,
// notEquals -> two-should bool, lte -> single range). Each must produce the same DSL as its
// intact-UDT counterpart, proving the field-type fallback in isFieldOrLiteralDateTime keeps
// ISO-8601 normalization + format("date_time") on every comparison op, not just gt. See #5481.
@Test
void equals_normalizesVarcharLiteralAgainstTimestampField()
throws ExpressionNotAnalyzableException {
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
RexNode call = builder.makeCall(SqlStdOperatorTable.EQUALS, field4, varcharLiteral);
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);

assertInstanceOf(RangeQueryBuilder.class, result);
assertEquals(
"""
{
"range" : {
"d" : {
"from" : "1987-02-03T04:34:56.000Z",
"to" : "1987-02-03T04:34:56.000Z",
"include_lower" : true,
"include_upper" : true,
"format" : "date_time",
"boost" : 1.0
}
}
}\
""",
result.toString());
}

@Test
void notEquals_normalizesVarcharLiteralAgainstTimestampField()
throws ExpressionNotAnalyzableException {
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
RexNode call = builder.makeCall(SqlStdOperatorTable.NOT_EQUALS, field4, varcharLiteral);
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);

assertInstanceOf(BoolQueryBuilder.class, result);
assertEquals(
"""
{
"bool" : {
"should" : [
{
"range" : {
"d" : {
"from" : "1987-02-03T04:34:56.000Z",
"to" : null,
"include_lower" : false,
"include_upper" : true,
"format" : "date_time",
"boost" : 1.0
}
}
},
{
"range" : {
"d" : {
"from" : null,
"to" : "1987-02-03T04:34:56.000Z",
"include_lower" : true,
"include_upper" : false,
"format" : "date_time",
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}\
""",
result.toString());
}

@Test
void lte_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException {
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
RexNode call = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, field4, varcharLiteral);
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);

assertInstanceOf(RangeQueryBuilder.class, result);
assertEquals(
"""
{
"range" : {
"d" : {
"from" : null,
"to" : "1987-02-03T04:34:56.000Z",
"include_lower" : true,
"include_upper" : true,
"format" : "date_time",
"boost" : 1.0
}
}
}\
""",
result.toString());
}

@Test
void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzableException {
RexNode call =
Expand Down
Loading