diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 91a30361a20..68f389363b3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1889,10 +1889,22 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { } } else { // The join-with-criteria grammar doesn't allow empty join condition - RexNode joinCondition = - node.getJoinCondition() - .map(c -> rexVisitor.analyzeJoinCondition(c, context)) - .orElse(context.relBuilder.literal(true)); + RexNode joinCondition; + List bareFields = new ArrayList<>(); + if (JoinAndLookupUtils.collectBareFields(node.getJoinCondition().get(), bareFields)) { + // Bare-field shorthand `on a [AND b ...]`. Resolving by stack position rather than by + // qualifier keeps a self-join `join on f t` a real cross-scan equi-join, not f = f. + joinCondition = + bareFields.stream() + .map(f -> buildJoinConditionByFieldName(context, f)) + .reduce(context.rexBuilder::and) + .orElse(context.relBuilder.literal(true)); + } else { + joinCondition = + node.getJoinCondition() + .map(c -> rexVisitor.analyzeJoinCondition(c, context)) + .orElse(context.relBuilder.literal(true)); + } if (node.getJoinType() == SEMI || node.getJoinType() == ANTI) { // semi and anti join only return left table outputs context.relBuilder.join( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/JoinAndLookupUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/JoinAndLookupUtils.java index dce882bc151..9ed9d58a5da 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/JoinAndLookupUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/JoinAndLookupUtils.java @@ -13,6 +13,10 @@ import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.Pair; +import org.opensearch.sql.ast.expression.And; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.QualifiedName; +import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.calcite.CalcitePlanContext; @@ -37,6 +41,24 @@ static JoinRelType translateJoinType(Join.JoinType joinType) { } } + /** + * Collects the names of bare single-part-field join criteria (e.g. `on a AND b` -> [a, b]). + * Returns false for anything else (qualified field, comparison, OR); {@code out} is only valid + * when this returns true, so callers must check the return before reading it. + */ + static boolean collectBareFields(UnresolvedExpression expr, List out) { + if (expr instanceof And and) { + return collectBareFields(and.getLeft(), out) && collectBareFields(and.getRight(), out); + } + if (expr instanceof Field field + && field.getField() instanceof QualifiedName qn + && qn.getParts().size() == 1) { + out.add(qn.getParts().get(0)); + return true; + } + return false; + } + /* ------For Lookup------ */ /** diff --git a/docs/user/ppl/cmd/join.md b/docs/user/ppl/cmd/join.md index 548d448b6a0..87c48d09dbe 100644 --- a/docs/user/ppl/cmd/join.md +++ b/docs/user/ppl/cmd/join.md @@ -29,6 +29,9 @@ source = table1 | left anti join left = l right = r on l.a = r.a table2 source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] source = table1 | inner join on table1.a = table2.a table2 | fields table1.a, table2.a, table1.b, table1.c source = table1 | inner join on a = c table2 | fields a, b, c, d +source = table1 | inner join on a table2 | fields a, table2.a, b, c +source = table1 | inner join on a AND b table2 | fields a, table2.a, b, table2.b +source = table1 | join on a table2 | fields a, table2.a, b, c source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields l.a, r.a source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields t1.a, t2.a source = table1 | join left = l right = r on l.a = r.a [ source = table2 ] as s | fields l.a, s.a @@ -40,7 +43,7 @@ The basic `join` syntax supports the following parameters. | Parameter | Required/Optional | Description | | --- | --- | --- | -| `` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. | +| `` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. A bare field name `f` (or `f AND g ...`) is shorthand for an equi-join on the common field(s) and keeps both key columns. | | `` | Required | The right dataset, which can be an index or a subsearch, with or without an alias. | | `joinType` | Optional | The type of join to perform. Valid values are `left`, `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. | | `left` | Optional | An alias for the left dataset (typically a subsearch) used to avoid ambiguous field names. Specify as `left = `. | @@ -71,7 +74,7 @@ The extended `join` syntax supports the following parameters. | Parameter | Required/Optional | Description | | --- | --- | --- | -| `` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. | +| `` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. A bare field name `f` (or `f AND g ...`) is shorthand for an equi-join on the common field(s); it keeps both key columns and differs from ``, which merges duplicates. | | `` | Required | The right dataset, which can be an index or a subsearch, with or without an alias. | | `type` | Optional | The join type when using extended syntax. Valid values are `left`, `outer` (same as `left`), `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. | | `` | Optional | A list of fields used to build the join criteria. These fields must exist in both datasets. If not specified, all fields common to both datasets are used as join keys. | diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java index d6d1c72f992..b6abc8fa541 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java @@ -928,6 +928,106 @@ public void testJoinWhenLegacyNotPreferred() throws IOException { }); } + @Test + public void testJoinWithImplicitField() throws IOException { + // Keep-both, so co-named columns (other than the key) resolve to the left side. + JSONObject actual = + executeQuery( + String.format( + "source=%s | inner join on name %s | fields name, age, state, country, occupation," + + " salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + verifySchema( + actual, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("occupation", "string"), + schema("salary", "int")); + verifyDataRows( + actual, + rows("Jake", 70, "California", "USA", "Engineer", 100000), + rows("Hello", 30, "New York", "USA", "Artist", 70000), + rows("John", 25, "Ontario", "Canada", "Doctor", 120000), + rows("Jane", 20, "Quebec", "Canada", "Scientist", 90000), + rows("David", 40, "Washington", "USA", "Doctor", 120000), + rows("David", 40, "Washington", "USA", "Unemployed", 0)); + + // The shorthand and the explicit qualified-equality form are output-identical. + JSONObject explicitForm = + executeQuery( + String.format( + "source=%s | inner join on %s.name = %s.name %s | fields name, age, state, country," + + " occupation, salary", + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_OCCUPATION)); + assertJsonEquals(explicitForm.toString(), actual.toString()); + } + + @Test + public void testJoinNoPrefixImplicitField() throws IOException { + // No-prefix `join on name` matches the explicit qualified form, not the field-list `join name`. + JSONObject actual = + executeQuery( + String.format( + "source=%s | join on name %s | fields name, age, state, occupation, salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + JSONObject explicitForm = + executeQuery( + String.format( + "source=%s | join on %s.name = %s.name %s | fields name, age, state, occupation," + + " salary", + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_OCCUPATION)); + assertJsonEquals(explicitForm.toString(), actual.toString()); + } + + @Test + public void testLeftJoinWithImplicitField() throws IOException { + // Keep-both does not coalesce, so unmatched-left rows keep the non-null left key, not a NULL. + JSONObject actual = + executeQuery( + String.format( + "source=%s | left join on name %s | fields name, age, state, occupation, salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + verifySchema( + actual, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("occupation", "string"), + schema("salary", "int")); + verifyDataRows( + actual, + rows("Jake", 70, "California", "Engineer", 100000), + rows("Hello", 30, "New York", "Artist", 70000), + rows("John", 25, "Ontario", "Doctor", 120000), + rows("Jane", 20, "Quebec", "Scientist", 90000), + rows("David", 40, "Washington", "Doctor", 120000), + rows("David", 40, "Washington", "Unemployed", 0), + rows("Jim", 27, "B.C", null, null), + rows("Peter", 57, "B.C", null, null), + rows("Rick", 70, "B.C", null, null)); + } + + @Test + public void testAliasedSelfJoinWithImplicitField() throws IOException { + // Self-join on the unique `name`: a real equi-join matches each of the 8 rows to itself (8 + // rows), not the 64-row cross product a tautology would give. + JSONObject actual = + executeQuery( + String.format( + "source=%s | inner join left=l right=r on name %s | fields name, r.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_STATE_COUNTRY)); + verifyNumOfRows(actual, 8); + verifySchema(actual, schema("name", "string"), schema("r.name", "string")); + } + @Test public void testJoinComparing() throws IOException { JSONObject actual = diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index ff46a3649a0..98f85b08282 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -729,8 +729,9 @@ sourceFilterArg // join joinCommand - : JOIN (joinOption)* (fieldList)? right = tableOrSubqueryClause - | sqlLikeJoinType? JOIN (joinOption)* sideAlias joinHintList? joinCriteria right = tableOrSubqueryClause + // Criteria alt listed first - so `join on a` reads `on` as the criteria keyword, not a field. + : sqlLikeJoinType? JOIN (joinOption)* sideAlias joinHintList? joinCriteria right = tableOrSubqueryClause + | JOIN (joinOption)* (fieldList)? right = tableOrSubqueryClause ; sqlLikeJoinType diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index d4f5eea0fb7..3741137f5a9 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -326,6 +326,8 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct if (ctx.fieldList() != null) { joinFields = Optional.of(getFieldList(ctx.fieldList())); } + // Keep a bare `on ` criteria verbatim; the planner expands it to an equi-join. Folding + // it into joinFields here would instead merge the key into one column. return new Join( projectExceptMeta(right), leftAlias, diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java index b9c6dd4b093..eea4bd28cb1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java @@ -1163,4 +1163,245 @@ public void testSqlLikeJoinWithSpecificJoinType() { + "LEFT JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + // A join-type prefix or alias is required for `on`/`where` to read as the criteria keyword + // rather than as a field list. + @Test + public void testJoinWithImplicitField() { + String ppl = "source=EMP | inner join on DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `DEPT.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testJoinWithImplicitFieldAlias() { + String ppl = "source=EMP | join left=l right=r on DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], r.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `r.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testJoinWithImplicitFieldWhereKeyword() { + String ppl = "source=EMP | inner join where DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `DEPT.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testLeftJoinWithImplicitField() { + String ppl = "source=EMP | left join on DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[left])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `DEPT.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "LEFT JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testSemiJoinWithImplicitField() { + String ppl = "source=EMP | semi join on DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalJoin(condition=[=($7, $8)], joinType=[semi])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE EXISTS (SELECT 1\n" + + "FROM `scott`.`DEPT`\n" + + "WHERE `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testJoinWithImplicitFieldMultiField() { + String ppl = "source=EMP | inner join on DEPTNO AND SAL EMP"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], EMP.EMPNO=[$8], EMP.ENAME=[$9], EMP.JOB=[$10]," + + " EMP.MGR=[$11], EMP.HIREDATE=[$12], EMP.SAL=[$13], EMP.COMM=[$14]," + + " EMP.DEPTNO=[$15])\n" + + " LogicalJoin(condition=[AND(=($7, $15), =($5, $13))], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + // Self-join joins the two scans by position, so the key is a real equi-join across them, not the + // EMP.DEPTNO=EMP.DEPTNO tautology a qualifier rewrite would collapse to. + @Test + public void testJoinWithImplicitFieldSelfJoin() { + String ppl = "source=EMP | inner join on DEPTNO EMP"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], EMP.EMPNO=[$8], EMP.ENAME=[$9], EMP.JOB=[$10]," + + " EMP.MGR=[$11], EMP.HIREDATE=[$12], EMP.SAL=[$13], EMP.COMM=[$14]," + + " EMP.DEPTNO=[$15])\n" + + " LogicalJoin(condition=[=($7, $15)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + // ENAME is not in DEPT, so the right-side lookup fails. Match the field name only; Calcite's + // full message is version-dependent. + @Test + public void testJoinWithImplicitFieldNotOnBothSides() { + String ppl = "source=EMP | inner join on ENAME DEPT"; + Throwable t = Assert.assertThrows(RuntimeException.class, () -> getRelNode(ppl)); + verifyErrorMessageContains(t, "ENAME"); + } + + @Test + public void testJoinNoPrefixImplicitField() { + String ppl = "source=EMP | join on DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `DEPT.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testJoinNoPrefixImplicitFieldWhereKeyword() { + String ppl = "source=EMP | join where DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `EMP`.`DEPTNO`, `DEPT`.`DEPTNO` `DEPT.DEPTNO`," + + " `DEPT`.`DNAME`, `DEPT`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + // Right side is a subquery, not a table: the bare field still resolves (no underlying table name + // is needed), and the right key takes the subquery alias. + @Test + public void testJoinWithImplicitFieldAliasedSubquery() { + String ppl = "source=EMP | inner join on DEPTNO [ source=DEPT ] as d"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], d.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + } + + // Unaliased subquery: resolves by position with no alias and no reachable table name. + @Test + public void testJoinWithImplicitFieldUnaliasedSubquery() { + String ppl = "source=EMP | inner join on DEPTNO [ source=DEPT ]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], DEPT.DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + } + + // Aliases make the self-join meaningful: the right columns surface under `r`. + @Test + public void testJoinWithImplicitFieldAliasedSelfJoin() { + String ppl = "source=EMP | inner join left=l right=r on DEPTNO EMP"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], r.EMPNO=[$8], r.ENAME=[$9], r.JOB=[$10], r.MGR=[$11]," + + " r.HIREDATE=[$12], r.SAL=[$13], r.COMM=[$14], r.DEPTNO=[$15])\n" + + " LogicalJoin(condition=[=($7, $15)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index c5b033bfc9f..9d70487c741 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.agg; import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.appendPipe; import static org.opensearch.sql.ast.dsl.AstDSL.argument; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; @@ -77,6 +78,7 @@ import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.GraphLookup; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -1852,4 +1854,89 @@ public void testUnionWithMaxoutOption() { public void testMaxoutAsFieldName() { plan("source=t | eval maxout = 1"); } + + // These assert on joinCondition/joinFields rather than full Join equality, since JoinHint has no + // equals(). + @Test + public void testJoinWithImplicitFieldKeepsBareField() { + Join shorthand = (Join) plan("source=t1 | inner join on a AND b t2"); + assertTrue(shorthand.getJoinFields().isEmpty()); + assertEquals(Optional.of(and(field("a"), field("b"))), shorthand.getJoinCondition()); + } + + @Test + public void testJoinWithSingleImplicitFieldKeepsBareField() { + Join shorthand = (Join) plan("source=t1 | inner join on a t2"); + assertTrue(shorthand.getJoinFields().isEmpty()); + assertEquals(Optional.of(field("a")), shorthand.getJoinCondition()); + } + + @Test + public void testJoinWithMultipleImplicitFieldsFlattenInOrder() { + Join join = (Join) plan("source=t1 | inner join on a AND b AND c t2"); + assertTrue(join.getJoinFields().isEmpty()); + assertEquals( + Optional.of(and(and(field("a"), field("b")), field("c"))), join.getJoinCondition()); + } + + @Test + public void testJoinWithImplicitFieldWhereKeywordKeepsBareField() { + Join join = (Join) plan("source=t1 | inner join where a t2"); + assertTrue(join.getJoinFields().isEmpty()); + assertEquals(Optional.of(field("a")), join.getJoinCondition()); + } + + @Test + public void testJoinWithQualifiedConditionIsNotRewritten() { + Join join = (Join) plan("source=t1 | inner join on l.a = r.a t2"); + assertEquals( + Optional.of(compare("=", field(qualifiedName("l", "a")), field(qualifiedName("r", "a")))), + join.getJoinCondition()); + assertTrue(join.getJoinFields().isEmpty()); + } + + // No-prefix form: the grammar reorder lets `on` read as the criteria keyword, not a field. + @Test + public void testJoinNoPrefixImplicitFieldKeepsBareField() { + Join join = (Join) plan("source=t1 | join on a t2"); + assertTrue(join.getJoinFields().isEmpty()); + assertEquals(Optional.of(field("a")), join.getJoinCondition()); + } + + @Test + public void testJoinNoPrefixImplicitFieldWhereKeepsBareField() { + Join join = (Join) plan("source=t1 | join where a t2"); + assertTrue(join.getJoinFields().isEmpty()); + assertEquals(Optional.of(field("a")), join.getJoinCondition()); + } + + @Test + public void testJoinWithImplicitFieldKeepsAliasesOnNode() { + Join join = (Join) plan("source=t1 | join left = l right = r on a t2"); + assertTrue(join.getJoinFields().isEmpty()); + assertEquals(Optional.of(field("a")), join.getJoinCondition()); + assertEquals(Optional.of("l"), join.getLeftAlias()); + assertEquals(Optional.of("r"), join.getRightAlias()); + } + + // The reorder must not change the field-list form. + @Test + public void testJoinFieldListStillParsesAsFieldList() { + Join join = (Join) plan("source=t1 | join a t2"); + assertEquals(Optional.empty(), join.getJoinCondition()); + assertEquals(Optional.of(List.of(field("a"))), join.getJoinFields()); + } + + @Test + public void testJoinNoPrefixComparisonStaysCondition() { + Join join = (Join) plan("source=t1 | join on a = b t2"); + assertTrue(join.getJoinCondition().isPresent()); + assertTrue(join.getJoinFields().isEmpty()); + } + + // A prefix with a bare field but no on/where is still a field list, so this is a syntax error. + @Test + public void testJoinPrefixWithoutCriteriaKeywordIsSyntaxError() { + assertThrows(SyntaxCheckException.class, () -> plan("source=t1 | inner join a t2")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 585575b2b24..6756cdc198a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -619,6 +619,28 @@ public void testJoinWithFieldList() { anonymize("source=t | join type=outer max=2 id1 id2 s | fields id1")); } + @Test + public void testJoinWithImplicitField() { + // The AST keeps the bare field, so the anonymized query shows `on identifier`, not a rewrite. + assertEquals( + "source=table | inner join max=*** on identifier table | fields + identifier", + anonymize("source=t | inner join on id s | fields id")); + assertEquals( + "source=table as identifier | inner join max=*** left = identifier right = identifier on" + + " identifier table as identifier | fields + identifier", + anonymize("source=t | join left = l right = r on id s | fields id")); + assertEquals( + "source=table | inner join max=*** on identifier and identifier table | fields +" + + " identifier", + anonymize("source=t | inner join on id AND uid s | fields id")); + assertEquals( + "source=table | inner join max=*** on identifier table | fields + identifier", + anonymize("source=t | inner join where id s | fields id")); + assertEquals( + "source=table | inner join max=*** on identifier table | fields + identifier", + anonymize("source=t | join on id s | fields id")); + } + @Test public void testLookup() { assertEquals(