diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java index 176d27e07d91..eb148dda25fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java @@ -25,6 +25,7 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelInput; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.TableFunctionScan; import org.apache.calcite.rel.metadata.RelColumnMapping; import org.apache.calcite.rel.type.RelDataType; @@ -34,6 +35,9 @@ public class HiveTableFunctionScan extends TableFunctionScan implements HiveRelNode { + // Whether this is a LATERAL VIEW OUTER + private final boolean outer; + /** * @param cluster * cluster - Cluster that this relational expression belongs to @@ -49,28 +53,54 @@ public class HiveTableFunctionScan extends TableFunctionScan implements HiveRelN * rowType - Row type produced by function * @param columnMappings * columnMappings - Column mappings associated with this function + * @param outer + * outer - true if this is a LATERAL VIEW OUTER */ protected HiveTableFunctionScan(RelOptCluster cluster, RelTraitSet traitSet, List inputs, - RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) { + RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings, + boolean outer) { super(cluster, traitSet, inputs, rexCall, elementType, rowType, columnMappings); + this.outer = outer; } public HiveTableFunctionScan(RelInput input) { super(input); + this.outer = input.getBoolean("outer", false); } public static HiveTableFunctionScan create(RelOptCluster cluster, RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) throws CalciteSemanticException { return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType, - columnMappings); + columnMappings, false); + } + + public static HiveTableFunctionScan create(RelOptCluster cluster, RelTraitSet traitSet, + List inputs, RexNode rexCall, Type elementType, RelDataType rowType, + Set columnMappings, boolean outer) throws CalciteSemanticException { + return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType, + columnMappings, outer); + } + + /** Returns true if this represents a LATERAL VIEW OUTER. */ + public boolean isOuter() { + return outer; + } + + @Override + public RelWriter explainTerms(RelWriter pw) { + super.explainTerms(pw); + if (outer) { + pw.item("outer", true); + } + return pw; } @Override public TableFunctionScan copy(RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) { return new HiveTableFunctionScan(getCluster(), traitSet, inputs, rexCall, - elementType, rowType, columnMappings); + elementType, rowType, columnMappings, outer); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 695196502ed1..f3e47f010646 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -703,14 +703,21 @@ private static QueryBlockInfo createASTLateralView(TableFunctionScan tfs, QueryB sel.add(selexpr.node()); // place the SELECT clause under the LATERAL VIEW clause - ASTBuilder lateralview = ASTBuilder.construct(HiveParser.TOK_LATERAL_VIEW, "TOK_LATERAL_VIEW"); - lateralview.add(sel.node()); + final boolean isOuterLateralView = tfs instanceof HiveTableFunctionScan htfs && htfs.isOuter(); + final int lateralViewToken = isOuterLateralView + ? HiveParser.TOK_LATERAL_VIEW_OUTER + : HiveParser.TOK_LATERAL_VIEW; + final String lateralViewText = isOuterLateralView + ? "TOK_LATERAL_VIEW_OUTER" + : "TOK_LATERAL_VIEW"; + ASTBuilder lateralView = ASTBuilder.construct(lateralViewToken, lateralViewText); + lateralView.add(sel.node()); // finally, add the LATERAL VIEW clause under the left side source which is the base table. - lateralview.add(tableFunctionSource.ast); + lateralView.add(tableFunctionSource.ast); Schema outputSchema = new Schema(tableFunctionSource.schema, new Schema(alias, lvFields)); - return new QueryBlockInfo(outputSchema, lateralview.node()); + return new QueryBlockInfo(outputSchema, lateralView.node()); } private boolean isLateralView(RelNode relNode) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d5c683daa303..f7dc75efe89e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1041,9 +1041,9 @@ boolean isCBOExecuted() { } @Override - boolean isCBOSupportedLateralView(ASTNode lateralView) { - // LATERAL VIEW OUTER not supported in CBO - return lateralView.getToken().getType() != HiveParser.TOK_LATERAL_VIEW_OUTER; + boolean isCBOSupportedLateralView() { + // Both LATERAL VIEW and LATERAL VIEW OUTER are supported in CBO. + return !this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP); } @Override @@ -2980,7 +2980,7 @@ private RelNode genJoinLogicalPlan(QB qb, ASTNode joinParseTree, Map SELECT -> SELEXPR -> FUNCTION -> function info tree ASTNode selExprAST = (ASTNode) lateralView.getChild(0).getChild(0); ASTNode functionAST = (ASTNode) selExprAST.getChild(0); @@ -118,7 +118,7 @@ public LateralViewPlan(ASTNode lateralView, RelOptCluster cluster, RelNode input this.lateralViewRel = HiveTableFunctionScan.create(cluster, TraitsUtil.getDefaultTraitSet(cluster), ImmutableList.of(inputRel), udtfCall, - null, retType, createColumnMappings(inputRel)); + null, retType, createColumnMappings(inputRel), isOuter); } public static void validateLateralView(ASTNode lateralView) throws SemanticException { @@ -127,9 +127,9 @@ public static void validateLateralView(ASTNode lateralView) throws SemanticExcep " children."); } ASTNode next = (ASTNode) lateralView.getChild(1); - if (!TABLE_ALIAS_TOKEN_TYPES.contains(next.getToken().getType()) && - HiveParser.TOK_LATERAL_VIEW != next.getToken().getType()) { - throw new SemanticException(ASTErrorUtils.getMsg( + if (!TABLE_ALIAS_TOKEN_TYPES.contains(next.getToken().getType()) && + !SemanticAnalyzer.isASTNodeLateralView(next)) { + throw new SemanticException(ASTErrorUtils.getMsg( ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(), lateralView)); } } diff --git a/ql/src/test/queries/clientpositive/lateral_view_outer.q b/ql/src/test/queries/clientpositive/lateral_view_outer.q index ddb41a8fd099..80f92cebee58 100644 --- a/ql/src/test/queries/clientpositive/lateral_view_outer.q +++ b/ql/src/test/queries/clientpositive/lateral_view_outer.q @@ -13,4 +13,23 @@ create table array_valued as select key, if (key > 300, array(value, value), nul explain select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; +explain ast select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; +explain cbo +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; + +-- array_valued already has a nullable array column, which can be used for the view-based test +CREATE VIEW array_valued_view AS +SELECT array_valued.key AS key, a +FROM array_valued +LATERAL VIEW OUTER explode(value) lv AS a; + +-- CBO plan should contain `outer=[true]` in HiveTableFunctionScan node. +EXPLAIN CBO +SELECT key, a FROM array_valued_view limit 10; +-- Explain plan should contain `outer lateral view: true` in the UDTF Operator +EXPLAIN +SELECT key, a FROM array_valued_view limit 10; +-- Rows with null array value should still appear with a=NULL +SELECT key, a FROM array_valued_view limit 10; diff --git a/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out b/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out index c5959dce36b5..a44dddfc09e5 100644 --- a/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out +++ b/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out @@ -187,6 +187,59 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 ListSink +PREHOOK: query: explain ast +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +#### A masked pattern was here #### +POSTHOOK: query: explain ast +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +#### A masked pattern was here #### +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LATERAL_VIEW_OUTER + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_TABLE_OR_COL + value + a + TOK_TABALIAS + C + TOK_TABREF + TOK_TABNAME + array_valued + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_LIMIT + 10 + +PREHOOK: query: explain cbo +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[10]) + HiveProject(array_valued.key=[$0], array_valued.value=[$1], c.a=[$6]) + HiveTableFunctionScan(invocation=[LATERAL(explode($1), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) ARRAY value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, VARCHAR(2147483647) c.a)], outer=[true]) + HiveTableScan(table=[[default, array_valued]], table:alias=[array_valued]) + PREHOOK: query: select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@array_valued @@ -205,3 +258,110 @@ POSTHOOK: Input: default@array_valued 409 ["val_409","val_409"] val_409 255 NULL NULL 278 NULL NULL +PREHOOK: query: CREATE VIEW array_valued_view AS +SELECT array_valued.key AS key, a +FROM array_valued +LATERAL VIEW OUTER explode(value) lv AS a +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@array_valued +PREHOOK: Output: database:default +PREHOOK: Output: default@array_valued_view +POSTHOOK: query: CREATE VIEW array_valued_view AS +SELECT array_valued.key AS key, a +FROM array_valued +LATERAL VIEW OUTER explode(value) lv AS a +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@array_valued +POSTHOOK: Output: database:default +POSTHOOK: Output: default@array_valued_view +POSTHOOK: Lineage: array_valued_view.a SCRIPT [(array_valued)array_valued.FieldSchema(name:value, type:array, comment:null), ] +POSTHOOK: Lineage: array_valued_view.key SIMPLE [(array_valued)array_valued.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: EXPLAIN CBO +SELECT key, a FROM array_valued_view limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +PREHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT key, a FROM array_valued_view limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +POSTHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[10]) + HiveProject(key=[$0], a=[$6]) + HiveTableFunctionScan(invocation=[LATERAL(explode($1), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) ARRAY value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, VARCHAR(2147483647) lv.a)], outer=[true]) + HiveTableScan(table=[[default, array_valued]], table:alias=[array_valued]) + +PREHOOK: query: EXPLAIN +SELECT key, a FROM array_valued_view limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +PREHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT key, a FROM array_valued_view limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +POSTHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: array_valued + properties: + insideView TRUE + Lateral View Forward + Select Operator + expressions: key (type: string) + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col6 + Limit + Number of rows: 10 + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + ListSink + Select Operator + expressions: value (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + outer lateral view: true + Lateral View Join Operator + outputColumnNames: _col0, _col6 + Limit + Number of rows: 10 + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT key, a FROM array_valued_view limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +PREHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, a FROM array_valued_view limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +POSTHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +238 NULL +86 NULL +311 val_311 +311 val_311 +27 NULL +165 NULL +409 val_409 +409 val_409 +255 NULL +278 NULL