From 2ce98d58aa3151ef4c59076a4b99936f470ca3bb Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Wed, 8 Apr 2026 23:50:56 +0530 Subject: [PATCH 01/20] HIVE-29413: Avoid code duplication by updating getPartCols method for iceberg tables --- .../column/show/ShowColumnsOperation.java | 5 +- .../table/info/desc/DescTableOperation.java | 2 +- .../formatter/TextDescTableFormatter.java | 4 +- .../ddl/table/partition/PartitionUtils.java | 1 - .../hive/ql/metadata/DummyPartition.java | 4 +- .../hadoop/hive/ql/metadata/Partition.java | 3 +- .../apache/hadoop/hive/ql/metadata/Table.java | 62 ++++++++++++++----- .../ql/optimizer/ColumnPrunerProcFactory.java | 3 +- .../ql/parse/AcidExportSemanticAnalyzer.java | 3 +- .../parse/ColumnStatsAutoGatherContext.java | 3 +- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 3 +- .../hive/ql/parse/ImportSemanticAnalyzer.java | 2 - .../hive/ql/parse/MergeSemanticAnalyzer.java | 3 +- .../hadoop/hive/ql/parse/ParseUtils.java | 3 +- .../rewrite/CopyOnWriteMergeRewriter.java | 2 +- .../hive/ql/parse/rewrite/MergeRewriter.java | 2 +- .../ql/parse/rewrite/SplitMergeRewriter.java | 2 +- 17 files changed, 65 insertions(+), 42 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index 289479b7ee79..6a2abd4ddfc2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -66,10 +66,7 @@ private List getColumnsByPattern() throws HiveException { private List getCols() throws HiveException { Table table = context.getDb().getTable(desc.getTableName()); - List allColumns = new ArrayList<>(); - allColumns.addAll(table.getCols()); - allColumns.addAll(table.getPartCols()); - return allColumns; + return new ArrayList<>(table.getAllCols()); } private Matcher getMatcher() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index 9086ad90c677..b662008e95ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -127,7 +127,7 @@ private Deserializer getDeserializer(Table table) throws SQLException { private void getColumnsNoColumnPath(Table table, Partition partition, List cols) throws HiveException { cols.addAll(partition == null || table.getTableType() == TableType.VIRTUAL_VIEW ? table.getCols() : partition.getCols()); - if (!desc.isFormatted()) { + if (!desc.isFormatted() && !table.hasNonNativePartitionSupport()) { cols.addAll(table.getPartCols()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index 75f39291cd07..c128948cd31b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -174,9 +174,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column List partitionColumns = null; // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation if (table.isPartitioned()) { - partitionColumns = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : - table.getPartCols(); + partitionColumns = table.getPartCols(); } if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java index db7a5dfcd3d0..5882e4616506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Map.Entry; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index c188eb09fdcf..45087c901e3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -91,9 +91,7 @@ public List getValues() { values = new ArrayList<>(); // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - for (FieldSchema fs : table.hasNonNativePartitionSupport() - ? table.getStorageHandler().getPartitionKeys(table) - : table.getPartCols()) { + for (FieldSchema fs : table.getPartCols()) { String val = partSpec.get(fs.getName()); values.add(val); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 736e6e8c9f1a..330f37d1ef1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -173,7 +173,8 @@ protected void initialize(Table table, // set default if location is not set and this is a physical // table partition (not a view partition) if (table.getDataLocation() != null) { - Path partPath = new Path(table.getDataLocation(), Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); + Path partPath = new Path(table.getDataLocation(), + Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); tPartition.getSd().setLocation(partPath.toString()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 05faa38d7caf..93cdc1b605f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; @@ -119,6 +120,8 @@ public class Table implements Serializable { private transient StorageHandlerInfo storageHandlerInfo; private transient MaterializedViewMetadata materializedViewMetadata; + private List cachedPartCols; + private TableSpec tableSpec; private boolean materializedTable; @@ -194,6 +197,9 @@ public Table makeCopy() { newTab.setMetaTable(this.getMetaTable()); newTab.setSnapshotRef(this.getSnapshotRef()); + if (this.cachedPartCols != null) { + newTab.cachedPartCols = new ArrayList<>(this.cachedPartCols); + } return newTab; } @@ -215,6 +221,7 @@ public org.apache.hadoop.hive.metastore.api.Table getTTable() { */ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { this.tTable = tTable; + clearCachedPartCols(); } /** @@ -249,7 +256,7 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); { t.setSd(sd); - t.setPartitionKeys(new ArrayList()); + t.setPartitionKeys(null); t.setParameters(new HashMap()); t.setTableType(TableType.MANAGED_TABLE.toString()); t.setDbName(databaseName); @@ -596,14 +603,35 @@ public boolean equals(Object obj) { } public List getPartCols() { - List partKeys = tTable.getPartitionKeys(); - if (partKeys == null) { - partKeys = new ArrayList<>(); - tTable.setPartitionKeys(partKeys); + if (cachedPartCols != null) { + return cachedPartCols; + } + List partKeys; + if (isTableTypeSet() && hasNonNativePartitionSupport()) { + partKeys = getStorageHandler().getPartitionKeys(this); + } else { + partKeys = tTable.getPartitionKeys(); + if (partKeys == null) { + partKeys = new ArrayList<>(); + tTable.setPartitionKeys(partKeys); + } } + cachedPartCols = partKeys; return partKeys; } + private void clearCachedPartCols() { + cachedPartCols = null; + } + + private boolean isTableTypeSet() { + if (tTable.getParameters() == null) { + return false; + } + String tableType = tTable.getParameters().get(HiveMetaHook.TABLE_TYPE); + return tableType != null; + } + public FieldSchema getPartColByName(String colName) { return getPartCols().stream() .filter(key -> key.getName().toLowerCase().equals(colName)) @@ -611,9 +639,7 @@ public FieldSchema getPartColByName(String colName) { } public List getPartColNames() { - List partCols = hasNonNativePartitionSupport() ? - getStorageHandler().getPartitionKeys(this) : getPartCols(); - return partCols.stream().map(FieldSchema::getName) + return getPartCols().stream().map(FieldSchema::getName) .collect(Collectors.toList()); } @@ -762,14 +788,22 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - ArrayList f_list = new ArrayList(); - f_list.addAll(getCols()); - f_list.addAll(getPartCols()); - return f_list; + List allCols = new ArrayList<>(getCols()); + Set colNames = new HashSet<>(); + for (FieldSchema col : allCols) { + colNames.add(col.getName()); + } + for (FieldSchema col : getPartCols()) { + if (!colNames.contains(col.getName())) { + allCols.add(col); + } + } + return allCols; } public void setPartCols(List partCols) { tTable.setPartitionKeys(partCols); + clearCachedPartCols(); } public String getCatName() { @@ -813,7 +847,7 @@ public void setOutputFormatClass(String name) throws HiveException { } public boolean isPartitioned() { - return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : + return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : CollectionUtils.isNotEmpty(getPartCols()); } @@ -1154,7 +1188,7 @@ public static void validateColumns(List columns, List } colNames.add(colName); } - if (partCols != null) { + if (partCols != null && !icebergTable) { // there is no overlap between columns and partitioning columns for (FieldSchema partCol: partCols) { String colName = normalize(partCol.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 3d3e4ce7663f..0e914843e2e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -807,8 +807,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, for (FieldNode col : cols) { int index = originalOutputColumnNames.indexOf(col.getFieldName()); Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op); - List fullFieldList = new ArrayList(tab.getCols()); - fullFieldList.addAll(tab.getPartCols()); + List fullFieldList = new ArrayList<>(tab.getAllCols()); cppCtx.getParseContext().getColumnAccessInfo() .add(tab.getCompleteName(), fullFieldList.get(index).getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java index 06912a1b3226..05f3b85f271f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java @@ -175,7 +175,8 @@ private void analyzeAcidExport(ASTNode ast, Table exportTable, ASTNode tokRefOrN //now generate insert statement //insert into newTableName select * from ts StringBuilder rewrittenQueryStr = generateExportQuery( - newTable.getPartCols(), tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); + newTable.getPartCols(), + tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); ReparseResult rr = ParseUtils.parseRewrittenQuery(ctx, rewrittenQueryStr); Context rewrittenCtx = rr.rewrittenCtx; rewrittenCtx.setIsUpdateDeleteMerge(false); //it's set in parseRewrittenQuery() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 9109f9cb6086..d8c7d9527ec3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -83,7 +83,8 @@ public ColumnStatsAutoGatherContext(SemanticAnalyzer sa, HiveConf conf, this.isInsertInto = isInsertInto; this.origCtx = ctx; columns = tbl.getCols(); - partitionColumns = tbl.getPartCols(); + // current behaviour intact until we have getCols() giving only non-partition columns for non native tables as well + partitionColumns = tbl.hasNonNativePartitionSupport() ? new ArrayList<>() : tbl.getPartCols(); } public List getLoadFileWork() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index e23e54aa5230..9bf6269334e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -207,8 +207,7 @@ private static CharSequence genPartitionClause(Table tbl, List pa private static String getColTypeOf(Table tbl, String partKey) { - for (FieldSchema fs : tbl.hasNonNativePartitionSupport() ? - tbl.getStorageHandler().getPartitionKeys(tbl) : tbl.getPartitionKeys()) { + for (FieldSchema fs : tbl.getPartCols()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 4d4956fbec13..dcf197a2c201 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.QueryState; @@ -89,7 +88,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.TreeMap; /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index 882840ffef5a..ac0a7e8f12be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.rewrite.MergeStatement; import org.apache.hadoop.hive.ql.parse.rewrite.RewriterFactory; -import org.apache.hadoop.hive.ql.plan.HiveOperation; import java.util.ArrayList; import java.util.HashMap; @@ -230,7 +229,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause String deleteExtraPredicate) throws SemanticException { assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED; assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE; - Map newValuesMap = new HashMap<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + Map newValuesMap = new HashMap<>(targetTable.getAllCols().size()); ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0); //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions //before re-parsing, i.e. they are known to SemanticAnalyzer logic diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 9964b9369065..5fa22d9a2f82 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -581,8 +581,7 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); + List partitionKeys = table.getPartCols(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java index b72f2496d938..b7335473da85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java @@ -202,7 +202,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(hintStr); hintStr = null; } - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); values.addAll(sqlGenerator.getDeleteValues(Context.Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addValuesForRowLineageForCopyOnMerge(isRowLineageSupported, values, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index 3ec2e580f046..c436e85a4eb6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -224,7 +224,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(" -- update clause").append("\n"); List valuesAndAcidSortKeys = new ArrayList<>( - targetTable.getCols().size() + targetTable.getPartCols().size() + 1); + targetTable.getAllCols().size() + 1); valuesAndAcidSortKeys.addAll(sqlGenerator.getSortKeys(Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), valuesAndAcidSortKeys); sqlGenerator.appendInsertBranch(hintStr, valuesAndAcidSortKeys); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java index 84fcf186f6b7..06edaca90f0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java @@ -58,7 +58,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau String onClauseAsString = mergeStatement.getOnClauseAsText(); sqlGenerator.append(" -- update clause (insert part)\n"); - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addRowLineageColumnsForWhenMatchedUpdateClause(isRowLineageSupported, values, targetAlias, conf); sqlGenerator.appendInsertBranch(hintStr, values); From af646e702299a36213a42991ba105cfb833a97f3 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Thu, 9 Apr 2026 15:33:03 +0530 Subject: [PATCH 02/20] commit-2 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java | 2 +- .../java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 93cdc1b605f3..440772299a2c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -256,7 +256,7 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); { t.setSd(sd); - t.setPartitionKeys(null); + t.setPartitionKeys(new ArrayList()); t.setParameters(new HashMap()); t.setTableType(TableType.MANAGED_TABLE.toString()); t.setDbName(databaseName); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ffc1673f3670..3be50bde1499 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2998,6 +2998,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc final NotNullConstraint nnc = tabMetaData.getNotNullConstraint(); final PrimaryKeyInfo pkc = tabMetaData.getPrimaryKeyInfo(); + Set alreadyAdded = new HashSet<>(); for (StructField structField : fields) { colName = structField.getFieldName(); colInfo = new ColumnInfo( @@ -3006,6 +3007,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc isNullable(colName, nnc, pkc), tableAlias, false); colInfo.setSkewedCol(isSkewedCol(tableAlias, qb, colName)); rr.put(tableAlias, colName, colInfo); + alreadyAdded.add(colName); cInfoLst.add(colInfo); } // TODO: Fix this @@ -3015,6 +3017,9 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc // 3.2 Add column info corresponding to partition columns for (FieldSchema part_col : tabMetaData.getPartCols()) { colName = part_col.getName(); + if (alreadyAdded.contains(colName)) { + continue; + } colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), isNullable(colName, nnc, pkc), tableAlias, true); From d5f101c19602aba8f6c72ca2bd4833dff27135c1 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Fri, 10 Apr 2026 01:14:09 +0530 Subject: [PATCH 03/20] corrected bucket-map-join test --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java | 6 +++--- .../org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 440772299a2c..008603f98a97 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -633,9 +633,9 @@ private boolean isTableTypeSet() { } public FieldSchema getPartColByName(String colName) { - return getPartCols().stream() - .filter(key -> key.getName().toLowerCase().equals(colName)) - .findFirst().orElse(null); + return hasNonNativePartitionSupport() ? null : getPartCols().stream() + .filter(key -> key.getName().toLowerCase().equals(colName)) + .findFirst().orElse(null); } public List getPartColNames() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f8ec90287202..4dae0653e0ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12003,6 +12003,8 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { // Determine row schema for TSOP. // Include column names from SerDe, the partition and virtual columns. rwsch = new RowResolver(); + Set partCols = tab.hasNonNativePartitionSupport() ? + Sets.newHashSet(tab.getPartColNames()) : Collections.emptySet(); try { // Including parameters passed in the query if (properties != null) { @@ -12020,8 +12022,6 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { deserializer.handleJobLevelConfiguration(conf); List fields = rowObjectInspector .getAllStructFieldRefs(); - Set partCols = tab.hasNonNativePartitionSupport() ? - Sets.newHashSet(tab.getPartColNames()) : Collections.emptySet(); for (int i = 0; i < fields.size(); i++) { /** * if the column is a skewed column, use ColumnInfo accordingly @@ -12041,6 +12041,9 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns for (FieldSchema part_col : tab.getPartCols()) { + if(partCols.contains(part_col.getName())){ + break; + } LOG.trace("Adding partition col: " + part_col); rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); From b24899674bdd8fb9d57d7252506942663125ce6a Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Fri, 10 Apr 2026 11:34:14 +0530 Subject: [PATCH 04/20] corrected update statements --- .../hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/rewrite/MergeRewriter.java | 6 ++++-- .../hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java | 2 +- .../ql/parse/rewrite/sql/MultiInsertSqlGenerator.java | 8 +++++++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 101f6b1fc3d8..8e52f63c6611 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -120,7 +120,7 @@ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) thr // Make sure this isn't one of the partitioning columns, that's not supported. for (FieldSchema fschema : targetTable.getPartCols()) { - if (fschema.getName().equalsIgnoreCase(columnName)) { + if (fschema.getName().equalsIgnoreCase(columnName) && !targetTable.hasNonNativePartitionSupport()) { throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index c436e85a4eb6..c9ba50d110f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -250,8 +250,10 @@ protected void addValues(Table targetTable, String targetAlias, Map values.add( - formatter.apply(fieldSchema.getName()))); + if (!targetTable.hasNonNativePartitionSupport()) { + targetTable.getPartCols().forEach(fieldSchema -> values.add( + formatter.apply(fieldSchema.getName()))); + } } protected String getRhsExpValue(String newValue, String alias) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java index d14ddc7eb485..838d1d8a09a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java @@ -98,7 +98,7 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB insertValues.add(sqlGenerator.qualify(identifier)); } - if (updateBlock.getTargetTable().getPartCols() != null) { + if (!updateBlock.getTargetTable().hasNonNativePartitionSupport()) { updateBlock.getTargetTable().getPartCols().forEach( fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java index 7587daf13055..6576dd28d2ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java @@ -111,6 +111,9 @@ public void appendPartitionColsOfTarget() { */ public void appendPartitionCols(Table table) { // If the table is partitioned we have to put the partition() clause in + if (table.hasNonNativePartitionSupport()) { + return; + } List partCols = table.getPartCols(); if (partCols == null || partCols.isEmpty()) { return; @@ -148,7 +151,10 @@ public void removeLastChar() { } public void appendPartColsOfTargetTableWithComma(String alias) { - if (targetTable.getPartCols() == null || targetTable.getPartCols().isEmpty()) { + if (targetTable.hasNonNativePartitionSupport()) { + return; + } + if (targetTable.getPartCols().isEmpty()) { return; } queryStr.append(','); From 70567f893745a9f57633963b684fd4a0fab249ee Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Sat, 11 Apr 2026 20:13:07 +0530 Subject: [PATCH 05/20] corrected load, partition evolution tests --- .../org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java | 3 +++ .../java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java | 4 ++-- .../apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java index 17a964a44583..ae7696c3e536 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java @@ -75,6 +75,9 @@ public static boolean isSchemaEvolutionEnabled(Table table, Configuration conf) } public static boolean isFullPartitionSpec(Table table, Map partitionSpec) { + if (table.hasNonNativePartitionSupport()) { + return true; + } for (FieldSchema partitionCol : table.getPartCols()) { if (partitionSpec.get(partitionCol.getName()) == null) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 3be50bde1499..9d4371e296b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3017,8 +3017,8 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc // 3.2 Add column info corresponding to partition columns for (FieldSchema part_col : tabMetaData.getPartCols()) { colName = part_col.getName(); - if (alreadyAdded.contains(colName)) { - continue; + if (tabMetaData.hasNonNativePartitionSupport()) { + break; } colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index eb4a73f1e5e9..6aa5b08fd5f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -23,6 +23,7 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -511,7 +512,7 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc // Partition spec was already validated by caller when create TableSpec object. // So, need not validate inpPartSpec here. - List parts = table.getPartCols(); + List parts = table.hasNonNativePartitionSupport() ? Collections.emptyList() : table.getPartCols(); if (tableTree.getChildCount() >= 2) { ASTNode partSpecNode = (ASTNode) tableTree.getChild(1); inpPartSpec = new HashMap<>(partSpecNode.getChildCount()); @@ -561,7 +562,7 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc } rewrittenQueryStr.append(getFullTableNameForSQL((ASTNode)(tableTree.getChild(0)))); - addPartitionColsToInsert(table.getPartCols(), inpPartSpec, rewrittenQueryStr); + addPartitionColsToInsert(parts, inpPartSpec, rewrittenQueryStr); rewrittenQueryStr.append(" select * from "); rewrittenQueryStr.append(tempTblName); From 44d22f89df1985d625992dae457925f0b7087227 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Sun, 12 Apr 2026 01:54:59 +0530 Subject: [PATCH 06/20] refractored # Conflicts: # ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java --- .../mapreduce/TestHCatMultiOutputFormat.java | 2 +- .../hive/ql/ddl/table/AlterTableUtils.java | 3 -- .../create/like/CreateTableLikeOperation.java | 2 +- .../table/info/desc/DescTableOperation.java | 2 +- .../formatter/TextDescTableFormatter.java | 2 +- .../JsonShowTableStatusFormatter.java | 2 +- .../TextShowTableStatusFormatter.java | 2 +- .../ddl/table/partition/PartitionUtils.java | 2 +- .../AlterTableExchangePartitionAnalyzer.java | 4 +- .../partition/show/ShowPartitionAnalyzer.java | 6 +-- .../archive/AlterTableArchiveOperation.java | 2 +- .../archive/AlterTableArchiveUtils.java | 2 +- .../archive/AlterTableUnarchiveOperation.java | 2 +- .../create/AbstractCreateViewAnalyzer.java | 2 +- .../hadoop/hive/ql/exec/ArchiveUtils.java | 6 +-- .../hadoop/hive/ql/exec/DDLPlanUtils.java | 5 +-- .../apache/hadoop/hive/ql/exec/MoveTask.java | 2 +- .../apache/hadoop/hive/ql/exec/Utilities.java | 2 +- .../hive/ql/exec/repl/ReplLoadTask.java | 2 +- .../hive/ql/metadata/DummyPartition.java | 2 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 16 +++---- .../HiveMaterializedViewsRegistry.java | 3 +- .../hadoop/hive/ql/metadata/Partition.java | 8 ++-- .../apache/hadoop/hive/ql/metadata/Table.java | 44 +++++++++---------- .../hive/ql/optimizer/GenMapRedUtils.java | 2 +- .../ql/optimizer/ppr/PartitionPruner.java | 2 +- .../ql/parse/AcidExportSemanticAnalyzer.java | 2 +- .../hive/ql/parse/BaseSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 5 --- .../parse/ColumnStatsAutoGatherContext.java | 2 +- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 4 +- .../hive/ql/parse/ImportSemanticAnalyzer.java | 2 +- .../hive/ql/parse/LoadSemanticAnalyzer.java | 3 +- .../hive/ql/parse/MergeSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/ParseUtils.java | 4 +- .../ql/parse/RewriteSemanticAnalyzer.java | 2 +- .../hive/ql/parse/SemanticAnalyzer.java | 11 ++--- .../hive/ql/parse/rewrite/MergeRewriter.java | 8 ++-- .../ql/parse/rewrite/SplitUpdateRewriter.java | 2 +- .../rewrite/sql/MultiInsertSqlGenerator.java | 10 +---- .../NativeAcidMultiInsertSqlGenerator.java | 6 +-- .../hive/ql/stats/ColStatsProcessor.java | 2 +- 42 files changed, 85 insertions(+), 111 deletions(-) diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java index d87158b23fae..05e5495d2656 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -374,7 +374,7 @@ private List getTableData(String table, String database) throws Exceptio Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; - if (!tbl.getPartCols().isEmpty()) { + if (!tbl.getEffectivePartCols().isEmpty()) { List partitions = hive.getPartitions(tbl); List partDesc = new ArrayList(); List partLocs = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java index ae7696c3e536..17a964a44583 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java @@ -75,9 +75,6 @@ public static boolean isSchemaEvolutionEnabled(Table table, Configuration conf) } public static boolean isFullPartitionSpec(Table table, Map partitionSpec) { - if (table.hasNonNativePartitionSupport()) { - return true; - } for (FieldSchema partitionCol : table.getPartCols()) { if (partitionSpec.get(partitionCol.getName()) == null) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java index 770724b90abf..e10d4bdb00ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java @@ -100,7 +100,7 @@ private Table createViewLikeTable(Table oldTable) throws HiveException { setUserSpecifiedLocation(table); table.setFields(oldTable.getCols()); - table.setPartCols(oldTable.getPartCols()); + table.setPartCols(oldTable.getEffectivePartCols()); if (desc.getDefaultSerdeProps() != null) { for (Map.Entry e : desc.getDefaultSerdeProps().entrySet()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index b662008e95ed..9086ad90c677 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -127,7 +127,7 @@ private Deserializer getDeserializer(Table table) throws SQLException { private void getColumnsNoColumnPath(Table table, Partition partition, List cols) throws HiveException { cols.addAll(partition == null || table.getTableType() == TableType.VIRTUAL_VIEW ? table.getCols() : partition.getCols()); - if (!desc.isFormatted() && !table.hasNonNativePartitionSupport()) { + if (!desc.isFormatted()) { cols.addAll(table.getPartCols()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index c128948cd31b..2ecce54e088d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -174,7 +174,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column List partitionColumns = null; // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation if (table.isPartitioned()) { - partitionColumns = table.getPartCols(); + partitionColumns = table.getEffectivePartCols(); } if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java index 073db26e756c..405417916990 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java @@ -69,7 +69,7 @@ private Map makeOneTableStatus(Table table, Hive db, HiveConf co builder.put("partitioned", table.isPartitioned()); if (table.isPartitioned()) { - builder.put("partitionColumns", JsonDescTableFormatter.createColumnsInfo(table.getPartCols(), + builder.put("partitionColumns", JsonDescTableFormatter.createColumnsInfo(table.getEffectivePartCols(), Collections.emptyList())); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java index 552dc310465b..33205bebcbea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java @@ -73,7 +73,7 @@ private void writeStorageInfo(DataOutputStream out, Partition partition, Table t private void writeColumnsInfo(DataOutputStream out, Table table) throws IOException, UnsupportedEncodingException { String columns = MetaStoreUtils.getDDLFromFieldSchema("columns", table.getCols()); String partitionColumns = table.isPartitioned() ? - MetaStoreUtils.getDDLFromFieldSchema("partition_columns", table.getPartCols()) : ""; + MetaStoreUtils.getDDLFromFieldSchema("partition_columns", table.getEffectivePartCols()) : ""; out.write(Utilities.newLineCode); out.write(("columns:" + columns).getBytes(StandardCharsets.UTF_8)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java index 5882e4616506..0011b25df358 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java @@ -149,7 +149,7 @@ public static List getPartitionsWithSpecs(Hive db, Table table, GetPa } private static String tablePartitionColNames(Table table) { - List partCols = table.getPartCols(); + List partCols = table.getEffectivePartCols(); return String.join("/", partCols.toString()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java index 6485627c7e6e..91c8da2f74ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java @@ -84,7 +84,7 @@ protected void analyzeCommand(TableName tableName, Map partition if (AcidUtils.isTransactionalTable(sourceTable) || AcidUtils.isTransactionalTable(destTable)) { throw new SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg()); } - List sourceProjectFilters = MetaStoreUtils.getPvals(sourceTable.getPartCols(), partitionSpecs); + List sourceProjectFilters = MetaStoreUtils.getPvals(sourceTable.getEffectivePartCols(), partitionSpecs); // check if source partition exists GetPartitionsFilterSpec sourcePartitionsFilterSpec = new GetPartitionsFilterSpec(); @@ -106,7 +106,7 @@ protected void analyzeCommand(TableName tableName, Map partition throw new SemanticException(ErrorMsg.PARTITION_VALUE_NOT_CONTINUOUS.getMsg(partitionSpecs.toString())); } - List destProjectFilters = MetaStoreUtils.getPvals(destTable.getPartCols(), partitionSpecs); + List destProjectFilters = MetaStoreUtils.getPvals(destTable.getEffectivePartCols(), partitionSpecs); // check if dest partition exists GetPartitionsFilterSpec getDestPartitionsFilterSpec = new GetPartitionsFilterSpec(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java index c0bffcebdb23..4a7aea0490f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java @@ -102,7 +102,7 @@ ExprNodeDesc getShowPartitionsFilter(Table table, ASTNode command) throws Semant if (astChild.getType() == HiveParser.TOK_WHERE) { RowResolver rwsch = new RowResolver(); Map colTypes = new HashMap(); - for (FieldSchema fs : table.getPartCols()) { + for (FieldSchema fs : table.getEffectivePartCols()) { rwsch.put(table.getTableName(), fs.getName(), new ColumnInfo(fs.getName(), TypeInfoFactory.stringTypeInfo, null, true)); colTypes.put(fs.getName().toLowerCase(), fs.getType()); @@ -202,8 +202,8 @@ private String getShowPartitionsOrder(Table table, ASTNode command) throws Seman if (astChild.getType() == HiveParser.TOK_ORDERBY) { Map poses = new HashMap(); RowResolver rwsch = new RowResolver(); - for (int i = 0; i < table.getPartCols().size(); i++) { - FieldSchema fs = table.getPartCols().get(i); + for (int i = 0; i < table.getEffectivePartCols().size(); i++) { + FieldSchema fs = table.getEffectivePartCols().get(i); rwsch.put(table.getTableName(), fs.getName(), new ColumnInfo(fs.getName(), TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()), null, true)); poses.put(fs.getName().toLowerCase(), i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java index e218e590a24e..67f5b7946760 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java @@ -129,7 +129,7 @@ private Path getOriginalDir(Table table, PartSpecInfo partitionSpecInfo, List partitionColumns) throws SemanticException { - if (oldView.getPartCols().isEmpty() || oldView.getPartCols().equals(partitionColumns)) { + if (oldView.getEffectivePartCols().isEmpty() || oldView.getEffectivePartCols().equals(partitionColumns)) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index ebe8f2f52775..f25cadd40073 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -74,7 +74,7 @@ static public PartSpecInfo create(Table tbl, Map partSpec) // ARCHIVE PARTITION(hr='13') won't List prefixFields = new ArrayList(); List prefixValues = new ArrayList(); - List partCols = tbl.getPartCols(); + List partCols = tbl.getEffectivePartCols(); Iterator itrPsKeys = partSpec.keySet().iterator(); for (FieldSchema fs : partCols) { if (!itrPsKeys.hasNext()) { @@ -222,7 +222,7 @@ public static int getArchivingLevel(Partition p) throws HiveException { * @throws HiveException */ public static String getPartialName(Partition p, int level) throws HiveException { - List fields = p.getTable().getPartCols().subList(0, level); + List fields = p.getTable().getEffectivePartCols().subList(0, level); List values = p.getValues().subList(0, level); try { return Warehouse.makePartName(fields, values); @@ -273,7 +273,7 @@ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, Map spec = new HashMap(partSpec); List reversedKeys = new ArrayList(); - for (FieldSchema fs : tbl.getPartCols()) { + for (FieldSchema fs : tbl.getEffectivePartCols()) { if (spec.containsKey(fs.getName())) { reversedKeys.add(fs.getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index a5bc66733f46..3fe753de5e08 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; @@ -294,7 +293,7 @@ public String getPartitionActualName(Partition pt) { */ private Map getPartitionColumnToPrimitiveCategory(Partition pt) { Map resultMap = new HashMap<>(); - for (FieldSchema schema: pt.getTable().getPartCols()) { + for (FieldSchema schema: pt.getTable().getEffectivePartCols()) { resultMap.put( schema.getName(), ((PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(schema.getType())).getPrimitiveCategory() @@ -976,7 +975,7 @@ private String getComment(Table table) { } private String getPartitionsForView(Table table) { - List partitionKeys = table.getPartCols(); + List partitionKeys = table.getEffectivePartCols(); if (partitionKeys.isEmpty()) { return ""; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 3eca5531f127..a5ec3a023092 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -633,7 +633,7 @@ public void logMessage(LoadTableDesc tbd) { private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, TaskInformation ti) throws HiveException, IOException, InvalidOperationException { - List partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec()); + List partVals = MetaStoreUtils.getPvals(table.getEffectivePartCols(), tbd.getPartitionSpec()); db.validatePartitionNameCharacters(partVals); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("loadPartition called from " + tbd.getSourcePath() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index a29e532b113b..9cd67fcb9619 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -4321,7 +4321,7 @@ public static void setPartitionColumnNames(Configuration conf, TableScanOperator if (metadata == null) { return; } - List partCols = metadata.getPartCols(); + List partCols = metadata.getEffectivePartCols(); if (partCols != null && !partCols.isEmpty()) { conf.set(serdeConstants.LIST_PARTITION_COLUMNS, MetaStoreUtils.getColumnNamesFromFieldSchema(partCols)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java index 9287fd75e766..a7829d7b78bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java @@ -548,7 +548,7 @@ public static Task createViewTask(MetaData metaData, String dbNameToLoadIn, H } CreateViewDesc desc = new CreateViewDesc(dbDotView, table.getCols(), null, table.getParameters(), - table.getPartColNames(), false, false, viewOriginalText, viewExpandedText, table.getPartCols()); + table.getPartColNames(), false, false, viewOriginalText, viewExpandedText, table.getEffectivePartCols()); desc.setReplicationSpec(metaData.getReplicationSpec()); desc.setOwnerName(table.getOwner()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index 45087c901e3f..7af16770c059 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -91,7 +91,7 @@ public List getValues() { values = new ArrayList<>(); // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - for (FieldSchema fs : table.getPartCols()) { + for (FieldSchema fs : table.getEffectivePartCols()) { String val = partSpec.get(fs.getName()); values.add(val); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index cd33896807bc..0c583264816b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -869,7 +869,7 @@ public void createTable(String tableName, List columns, List par FieldSchema part = new FieldSchema(); part.setName(partCol); part.setType(STRING_TYPE_NAME); // default partition key - tbl.getPartCols().add(part); + tbl.getEffectivePartCols().add(part); } } tbl.setSerializationLib(LazySimpleSerDe.class.getName()); @@ -1246,8 +1246,8 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio throws HiveException { try { Map newPartSpec = newPart.getSpec(); - if (oldPartSpec.keySet().size() != tbl.getPartCols().size() - || newPartSpec.keySet().size() != tbl.getPartCols().size()) { + if (oldPartSpec.keySet().size() != tbl.getEffectivePartCols().size() + || newPartSpec.keySet().size() != tbl.getEffectivePartCols().size()) { throw new HiveException("Unable to rename partition to the same name: number of partition cols don't match. "); } if (!oldPartSpec.keySet().equals(newPartSpec.keySet())){ @@ -1255,7 +1255,7 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio } List pvals = new ArrayList(); - for (FieldSchema field : tbl.getPartCols()) { + for (FieldSchema field : tbl.getEffectivePartCols()) { String val = oldPartSpec.get(field.getName()); if (val == null || val.length() == 0) { throw new HiveException("get partition: Value for key " @@ -3832,7 +3832,7 @@ public Partition getPartition(Table tbl, Map partSpec, boolean forceCreate, String partPath, boolean inheritTableSpecs) throws HiveException { tbl.validatePartColumnNames(partSpec, true); List pvals = new ArrayList(); - for (FieldSchema field : tbl.getPartCols()) { + for (FieldSchema field : tbl.getEffectivePartCols()) { String val = partSpec.get(field.getName()); // enable dynamic partitioning if ((val == null && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMIC_PARTITIONING)) @@ -4221,7 +4221,7 @@ public List getPartitionNames(Table tbl, Map partSpec, s if (tbl.hasNonNativePartitionSupport()) { return tbl.getStorageHandler().getPartitionNames(tbl, partSpec); } - List pvals = MetaStoreUtils.getPvals(tbl.getPartCols(), partSpec); + List pvals = MetaStoreUtils.getPvals(tbl.getEffectivePartCols(), partSpec); return getPartitionNamesByPartitionVals(tbl, pvals, max); } @@ -4463,7 +4463,7 @@ private List getPartitionsWithAuth(Table tbl, Map par throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); } - List partialPvals = MetaStoreUtils.getPvals(tbl.getPartCols(), partialPartSpec); + List partialPvals = MetaStoreUtils.getPvals(tbl.getEffectivePartCols(), partialPartSpec); List partitions = null; try { @@ -4772,7 +4772,7 @@ static List convertFromPartSpec(Iterator iterator, Tab || partitionWithoutSD.getRelativePath().isEmpty()) { if (tbl.getDataLocation() != null) { Path partPath = new Path(tbl.getDataLocation(), - Warehouse.makePartName(tbl.getPartCols(), + Warehouse.makePartName(tbl.getEffectivePartCols(), partitionWithoutSD.getValues())); partitionLocation = partPath.toString(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 0cf02a95e392..b763c379bdc7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -430,7 +429,7 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable // 1.2 Add column info corresponding to partition columns ArrayList partitionColumns = new ArrayList(); - for (FieldSchema part_col : viewTable.getPartCols()) { + for (FieldSchema part_col : viewTable.getEffectivePartCols()) { colName = part_col.getName(); colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 330f37d1ef1c..d284e0231a03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -125,7 +125,7 @@ public Partition(Table tbl, Map partSpec, Path location) throws public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject( Table tbl, Map partSpec, Path location) throws HiveException { List pvals = new ArrayList(); - for (FieldSchema field : tbl.getPartCols()) { + for (FieldSchema field : tbl.getEffectivePartCols()) { String val = partSpec.get(field.getName()); if (val == null || val.isEmpty()) { throw new HiveException("partition spec is invalid; field " @@ -174,7 +174,7 @@ protected void initialize(Table table, // table partition (not a view partition) if (table.getDataLocation() != null) { Path partPath = new Path(table.getDataLocation(), - Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); + Warehouse.makePartName(table.getEffectivePartCols(), tPartition.getValues())); tPartition.getSd().setLocation(partPath.toString()); } } @@ -201,7 +201,7 @@ protected void initialize(Table table, public String getName() { try { - return Warehouse.makePartName(table.getPartCols(), tPartition.getValues()); + return Warehouse.makePartName(table.getEffectivePartCols(), tPartition.getValues()); } catch (MetaException e) { throw new RuntimeException(e); } @@ -544,7 +544,7 @@ public void setLocation(String location) { public void setValues(Map partSpec) throws HiveException { List pvals = new ArrayList(); - for (FieldSchema field : table.getPartCols()) { + for (FieldSchema field : table.getEffectivePartCols()) { String val = partSpec.get(field.getName()); if (val == null) { throw new HiveException( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 008603f98a97..13f075aebafc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -603,21 +603,24 @@ public boolean equals(Object obj) { } public List getPartCols() { + List partKeys = tTable.getPartitionKeys(); + if (partKeys == null) { + partKeys = new ArrayList<>(); + tTable.setPartitionKeys(partKeys); + } + return partKeys; + } + + public List getEffectivePartCols() { if (cachedPartCols != null) { return cachedPartCols; } - List partKeys; if (isTableTypeSet() && hasNonNativePartitionSupport()) { - partKeys = getStorageHandler().getPartitionKeys(this); + cachedPartCols = getStorageHandler().getPartitionKeys(this); } else { - partKeys = tTable.getPartitionKeys(); - if (partKeys == null) { - partKeys = new ArrayList<>(); - tTable.setPartitionKeys(partKeys); - } + cachedPartCols = getPartCols(); } - cachedPartCols = partKeys; - return partKeys; + return cachedPartCols; } private void clearCachedPartCols() { @@ -633,13 +636,13 @@ private boolean isTableTypeSet() { } public FieldSchema getPartColByName(String colName) { - return hasNonNativePartitionSupport() ? null : getPartCols().stream() + return getPartCols().stream() .filter(key -> key.getName().toLowerCase().equals(colName)) .findFirst().orElse(null); } public List getPartColNames() { - return getPartCols().stream().map(FieldSchema::getName) + return getEffectivePartCols().stream().map(FieldSchema::getName) .collect(Collectors.toList()); } @@ -788,16 +791,9 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - List allCols = new ArrayList<>(getCols()); - Set colNames = new HashSet<>(); - for (FieldSchema col : allCols) { - colNames.add(col.getName()); - } - for (FieldSchema col : getPartCols()) { - if (!colNames.contains(col.getName())) { - allCols.add(col); - } - } + ArrayList allCols = new ArrayList<>(); + allCols.addAll(getCols()); + allCols.addAll(getPartCols()); return allCols; } @@ -848,7 +844,7 @@ public void setOutputFormatClass(String name) throws HiveException { public boolean isPartitioned() { return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : - CollectionUtils.isNotEmpty(getPartCols()); + CollectionUtils.isNotEmpty(getEffectivePartCols()); } public void setFields(List fields) { @@ -1046,7 +1042,7 @@ public boolean isMaterializedView() { public LinkedHashMap createSpec( org.apache.hadoop.hive.metastore.api.Partition tp) { - List fsl = getPartCols(); + List fsl = getEffectivePartCols(); List tpl = tp.getValues(); LinkedHashMap spec = new LinkedHashMap(fsl.size()); for (int i = 0; i < fsl.size(); i++) { @@ -1188,7 +1184,7 @@ public static void validateColumns(List columns, List } colNames.add(colName); } - if (partCols != null && !icebergTable) { + if (partCols != null) { // there is no overlap between columns and partitioning columns for (FieldSchema partCol: partCols) { String colName = normalize(partCol.getName()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index bd1be003a512..c560783f070f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -2149,7 +2149,7 @@ static void usePartitionColumns(Properties properties, Table table, List if (properties.containsKey(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS)) { usePartitionColumns(properties, partColNames); } else { - List partCols = table.getPartCols(); + List partCols = table.getEffectivePartCols(); String partNames = partCols.stream().map(FieldSchema::getName).collect(Collectors.joining("/")); String partTypes = partCols.stream().map(FieldSchema::getType).collect(Collectors.joining(":")); properties.setProperty( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 7574ad5f6d24..26fba8533ecc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -521,7 +521,7 @@ static private boolean pruneBySequentialScan(Table tab, List partitio } private static List extractPartColTypes(Table tab) { - List pCols = tab.getPartCols(); + List pCols = tab.getEffectivePartCols(); List partColTypeInfos = new ArrayList<>(pCols.size()); for (FieldSchema pCol : pCols) { partColTypeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(pCol.getType())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java index 05f3b85f271f..c42f39fa6309 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java @@ -175,7 +175,7 @@ private void analyzeAcidExport(ASTNode ast, Table exportTable, ASTNode tokRefOrN //now generate insert statement //insert into newTableName select * from ts StringBuilder rewrittenQueryStr = generateExportQuery( - newTable.getPartCols(), + newTable.getEffectivePartCols(), tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); ReparseResult rr = ParseUtils.parseRewrittenQuery(ctx, rewrittenQueryStr); Context rewrittenCtx = rr.rewrittenCtx; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index f616049d8591..211462775911 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1202,7 +1202,7 @@ public TableSpec(Table tableHandle, List partitions) { if (partitions != null && !partitions.isEmpty()) { this.specType = SpecType.STATIC_PARTITION; this.partitions = partitions; - List partCols = this.tableHandle.getPartCols(); + List partCols = this.tableHandle.getEffectivePartCols(); this.partSpec = new LinkedHashMap<>(); for (FieldSchema partCol : partCols) { partSpec.put(partCol.getName(), null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 9d4371e296b3..ffc1673f3670 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -2998,7 +2998,6 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc final NotNullConstraint nnc = tabMetaData.getNotNullConstraint(); final PrimaryKeyInfo pkc = tabMetaData.getPrimaryKeyInfo(); - Set alreadyAdded = new HashSet<>(); for (StructField structField : fields) { colName = structField.getFieldName(); colInfo = new ColumnInfo( @@ -3007,7 +3006,6 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc isNullable(colName, nnc, pkc), tableAlias, false); colInfo.setSkewedCol(isSkewedCol(tableAlias, qb, colName)); rr.put(tableAlias, colName, colInfo); - alreadyAdded.add(colName); cInfoLst.add(colInfo); } // TODO: Fix this @@ -3017,9 +3015,6 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc // 3.2 Add column info corresponding to partition columns for (FieldSchema part_col : tabMetaData.getPartCols()) { colName = part_col.getName(); - if (tabMetaData.hasNonNativePartitionSupport()) { - break; - } colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), isNullable(colName, nnc, pkc), tableAlias, true); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index d8c7d9527ec3..b3e25950985e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -84,7 +84,7 @@ public ColumnStatsAutoGatherContext(SemanticAnalyzer sa, HiveConf conf, this.origCtx = ctx; columns = tbl.getCols(); // current behaviour intact until we have getCols() giving only non-partition columns for non native tables as well - partitionColumns = tbl.hasNonNativePartitionSupport() ? new ArrayList<>() : tbl.getPartCols(); + partitionColumns = tbl.getPartCols(); } public List getLoadFileWork() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 9bf6269334e7..5c9f898bea13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -207,7 +207,7 @@ private static CharSequence genPartitionClause(Table tbl, List pa private static String getColTypeOf(Table tbl, String partKey) { - for (FieldSchema fs : tbl.getPartCols()) { + for (FieldSchema fs : tbl.getEffectivePartCols()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } @@ -309,7 +309,7 @@ private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas, if (isPartitionStats) { if (partTransformSpec == null) { - for (FieldSchema fs : tbl.getPartCols()) { + for (FieldSchema fs : tbl.getEffectivePartCols()) { String identifier = unparseIdentifier(fs.getName(), conf); rewrittenQueryBuilder.append(", ").append(identifier); columnNamesBuilder.append(", ").append(identifier); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index dcf197a2c201..e671ad24d7b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -876,7 +876,7 @@ private static void checkTable(Table table, ImportTableDesc tableDesc, } { // check partitioning column order and types - List existingTablePartCols = table.getPartCols(); + List existingTablePartCols = table.getEffectivePartCols(); List importedTablePartCols = tableDesc.getPartCols(); if (!EximUtil.schemaCompare(importedTablePartCols, existingTablePartCols)) { throw new SemanticException( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index 6aa5b08fd5f4..9093570706f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -23,7 +23,6 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -512,7 +511,7 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc // Partition spec was already validated by caller when create TableSpec object. // So, need not validate inpPartSpec here. - List parts = table.hasNonNativePartitionSupport() ? Collections.emptyList() : table.getPartCols(); + List parts = table.getPartCols(); if (tableTree.getChildCount() >= 2) { ASTNode partSpecNode = (ASTNode) tableTree.getChild(1); inpPartSpec = new HashMap<>(partSpecNode.getChildCount()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index ac0a7e8f12be..353ad9e2b574 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -431,7 +431,7 @@ private static final class OnClauseAnalyzer { HiveConf conf, String onClauseAsString) { this.onClause = onClause; allTargetTableColumns.addAll(targetTable.getCols()); - allTargetTableColumns.addAll(targetTable.getPartCols()); + allTargetTableColumns.addAll(targetTable.getEffectivePartCols()); this.targetTableNameInSourceQuery = unescapeIdentifier(targetTableNameInSourceQuery); this.conf = conf; this.onClauseAsString = onClauseAsString; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 5fa22d9a2f82..27bde7acfe2d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -581,7 +581,7 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.getPartCols(); + List partitionKeys = table.getEffectivePartCols(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } @@ -691,7 +691,7 @@ public static Map> getFullPartitionSpecs( */ private static int calculatePartPrefix(Table tbl, Set partSpecKeys) { int partPrefixToDrop = 0; - for (FieldSchema fs : tbl.getPartCols()) { + for (FieldSchema fs : tbl.getEffectivePartCols()) { if (!partSpecKeys.contains(fs.getName())) { break; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 8e52f63c6611..101f6b1fc3d8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -120,7 +120,7 @@ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) thr // Make sure this isn't one of the partitioning columns, that's not supported. for (FieldSchema fschema : targetTable.getPartCols()) { - if (fschema.getName().equalsIgnoreCase(columnName) && !targetTable.hasNonNativePartitionSupport()) { + if (fschema.getName().equalsIgnoreCase(columnName)) { throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4dae0653e0ee..915ff21bd912 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2210,7 +2210,7 @@ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase } } else { // partition spec is not specified but column schema can have partitions specified - for(FieldSchema f : targetTable.getPartCols()) { + for(FieldSchema f : targetTable.getEffectivePartCols()) { //parser only allows foo(a,b), not foo(foo.a, foo.b) targetColumns.remove(f.getName()); } @@ -12003,8 +12003,6 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { // Determine row schema for TSOP. // Include column names from SerDe, the partition and virtual columns. rwsch = new RowResolver(); - Set partCols = tab.hasNonNativePartitionSupport() ? - Sets.newHashSet(tab.getPartColNames()) : Collections.emptySet(); try { // Including parameters passed in the query if (properties != null) { @@ -12022,6 +12020,8 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { deserializer.handleJobLevelConfiguration(conf); List fields = rowObjectInspector .getAllStructFieldRefs(); + Set partCols = tab.hasNonNativePartitionSupport() ? + Sets.newHashSet(tab.getPartColNames()) : Collections.emptySet(); for (int i = 0; i < fields.size(); i++) { /** * if the column is a skewed column, use ColumnInfo accordingly @@ -12041,9 +12041,6 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns for (FieldSchema part_col : tab.getPartCols()) { - if(partCols.contains(part_col.getName())){ - break; - } LOG.trace("Adding partition col: " + part_col); rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); @@ -12309,7 +12306,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String if (tab.isPartitioned() && !tab.hasNonNativePartitionSupport()) { List cols = new ArrayList(); if (qbp.getAnalyzeRewrite() != null) { - List partitionCols = tab.getPartCols(); + List partitionCols = tab.getEffectivePartCols(); for (FieldSchema fs : partitionCols) { cols.add(fs.getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index c9ba50d110f8..cb759a11d080 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -249,11 +249,9 @@ protected void addValues(Table targetTable, String targetAlias, Map values.add( - formatter.apply(fieldSchema.getName()))); - } + + targetTable.getPartCols().forEach(fieldSchema -> values.add( + formatter.apply(fieldSchema.getName()))); } protected String getRhsExpValue(String newValue, String alias) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java index 838d1d8a09a6..d14ddc7eb485 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java @@ -98,7 +98,7 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB insertValues.add(sqlGenerator.qualify(identifier)); } - if (!updateBlock.getTargetTable().hasNonNativePartitionSupport()) { + if (updateBlock.getTargetTable().getPartCols() != null) { updateBlock.getTargetTable().getPartCols().forEach( fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java index 6576dd28d2ec..928564012985 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java @@ -111,9 +111,6 @@ public void appendPartitionColsOfTarget() { */ public void appendPartitionCols(Table table) { // If the table is partitioned we have to put the partition() clause in - if (table.hasNonNativePartitionSupport()) { - return; - } List partCols = table.getPartCols(); if (partCols == null || partCols.isEmpty()) { return; @@ -151,14 +148,11 @@ public void removeLastChar() { } public void appendPartColsOfTargetTableWithComma(String alias) { - if (targetTable.hasNonNativePartitionSupport()) { - return; - } - if (targetTable.getPartCols().isEmpty()) { + if (targetTable.getPartCols() == null || targetTable.getPartCols().isEmpty()) { return; } queryStr.append(','); - appendCols(targetTable.getPartCols(), alias, null, FieldSchema::getName); + appendCols(targetTable.getEffectivePartCols(), alias, null, FieldSchema::getName); } public void appendAllColsOfTargetTable(String prefix) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java index 87e426800442..94cda746396b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java @@ -36,7 +36,7 @@ public NativeAcidMultiInsertSqlGenerator(Table table, String targetTableFullName @Override public void appendAcidSelectColumns(Operation operation) { queryStr.append("ROW__ID,"); - for (FieldSchema fieldSchema : targetTable.getPartCols()) { + for (FieldSchema fieldSchema : targetTable.getEffectivePartCols()) { String identifier = HiveUtils.unparseIdentifier(fieldSchema.getName(), this.conf); queryStr.append(identifier); queryStr.append(","); @@ -45,9 +45,9 @@ public void appendAcidSelectColumns(Operation operation) { @Override public List getDeleteValues(Operation operation) { - List deleteValues = new ArrayList<>(1 + targetTable.getPartCols().size()); + List deleteValues = new ArrayList<>(1 + targetTable.getEffectivePartCols().size()); deleteValues.add(qualify("ROW__ID")); - for (FieldSchema fieldSchema : targetTable.getPartCols()) { + for (FieldSchema fieldSchema : targetTable.getEffectivePartCols()) { deleteValues.add(qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf))); } return deleteValues; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index be62d94019ed..cb257ee216f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -156,7 +156,7 @@ private boolean constructColumnStatsFromPackedRows(Table tbl, List Date: Sun, 12 Apr 2026 21:24:51 +0530 Subject: [PATCH 07/20] addressed sonar issues # Conflicts: # ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java --- .../llap/io/api/impl/LlapInputFormat.java | 3 +- .../hadoop/hive/llap/ProactiveEviction.java | 32 ++++----- .../column/show/ShowColumnsOperation.java | 8 +-- .../formatter/TextDescTableFormatter.java | 34 ++++------ .../hadoop/hive/ql/exec/ArchiveUtils.java | 3 +- .../ql/exec/vector/VectorizedRowBatchCtx.java | 3 +- .../hive/ql/metadata/DummyPartition.java | 1 - .../HiveMaterializedViewsRegistry.java | 6 +- .../hadoop/hive/ql/metadata/Partition.java | 21 +++--- .../apache/hadoop/hive/ql/metadata/Table.java | 66 +++++++++---------- .../ql/optimizer/physical/Vectorizer.java | 2 +- .../ql/optimizer/ppr/PartExprEvalUtils.java | 9 ++- .../parse/ColumnStatsAutoGatherContext.java | 1 - .../hive/ql/parse/MergeSemanticAnalyzer.java | 30 ++++----- .../hadoop/hive/ql/parse/ParseUtils.java | 26 +++----- .../hive/ql/parse/SemanticAnalyzer.java | 8 +-- .../hadoop/hive/ql/plan/PartitionDesc.java | 8 +-- .../hadoop/hive/common/io/CacheTag.java | 2 +- 18 files changed, 112 insertions(+), 151 deletions(-) diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index e74885e57a3d..67e59155b67c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -31,7 +31,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -232,7 +231,7 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept if (paths.hasNext()) { PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next()); if (partDesc != null) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && !partSpec.isEmpty()) { partitionColumnCount = partSpec.size(); } diff --git a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java index 120949fc9949..f4a008257bb8 100644 --- a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java +++ b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java @@ -158,13 +158,13 @@ public static final class Request { // Holds a hierarchical structure of DBs, tables and partitions such as: // { testdb : { testtab0 : [], testtab1 : [ {pk0 : p0v0, pk1 : p0v1}, {pk0 : p1v0, pk1 : p1v1} ] }, testdb2 : {} } - private final Map>>> entities; + private final Map>>> entities; - private Request(Map>>> entities) { + private Request(Map>>> entities) { this.entities = entities; } - public Map>>> getEntities() { + public Map>>> getEntities() { return entities; } @@ -191,21 +191,21 @@ public List toProtoRequests() List protoRequests = new LinkedList<>(); - for (Map.Entry>>> dbEntry : entities.entrySet()) { + for (Map.Entry>>> dbEntry : entities.entrySet()) { String dbName = dbEntry.getKey(); - Map>> tables = dbEntry.getValue(); + Map>> tables = dbEntry.getValue(); LlapDaemonProtocolProtos.EvictEntityRequestProto.Builder requestBuilder = LlapDaemonProtocolProtos.EvictEntityRequestProto.newBuilder(); LlapDaemonProtocolProtos.TableProto.Builder tableBuilder = null; requestBuilder.setDbName(dbName.toLowerCase()); - for (Map.Entry>> tableEntry : tables.entrySet()) { + for (Map.Entry>> tableEntry : tables.entrySet()) { String tableName = tableEntry.getKey(); tableBuilder = LlapDaemonProtocolProtos.TableProto.newBuilder(); tableBuilder.setTableName(tableName.toLowerCase()); - Set> partitions = tableEntry.getValue(); + Set> partitions = tableEntry.getValue(); Set partitionKeys = null; for (Map partitionSpec : partitions) { @@ -245,7 +245,7 @@ public boolean isTagMatch(CacheTag cacheTag) { return false; } - Map>> tables = entities.get(db); + Map>> tables = entities.get(db); // If true, must be a drop DB event and this cacheTag matches. if (tables.isEmpty()) { @@ -261,7 +261,7 @@ public boolean isTagMatch(CacheTag cacheTag) { for (String tableAndDbName : tables.keySet()) { if (tableAndDbName.equals(tagTableName.getNotEmptyDbTable())) { - Set> partDescs = tables.get(tableAndDbName); + Set> partDescs = tables.get(tableAndDbName); // If true, must be a drop table event, and this cacheTag matches. if (partDescs == null) { @@ -292,7 +292,7 @@ public String toString() { */ public static final class Builder { - private final Map>>> entities; + private final Map>>> entities; private Builder() { this.entities = new HashMap<>(); @@ -302,7 +302,7 @@ public static Builder create() { return new Builder(); } - public Builder addPartitionOfATable(String db, String tableName, LinkedHashMap partSpec) { + public Builder addPartitionOfATable(String db, String tableName, Map partSpec) { ensureDb(db); ensureTable(db, tableName); entities.get(db).get(tableName).add(partSpec); @@ -325,7 +325,7 @@ public Request build() { } private void ensureDb(String dbName) { - Map>> tables = entities.get(dbName); + Map>> tables = entities.get(dbName); if (tables == null) { tables = new HashMap<>(); entities.put(dbName, tables); @@ -334,9 +334,9 @@ private void ensureDb(String dbName) { private void ensureTable(String dbName, String tableName) { ensureDb(dbName); - Map>> tables = entities.get(dbName); + Map>> tables = entities.get(dbName); - Set> partitions = tables.get(tableName); + Set> partitions = tables.get(tableName); if (partitions == null) { partitions = new HashSet<>(); tables.put(tableName, partitions); @@ -352,7 +352,7 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entities.clear(); String dbName = protoRequest.getDbName().toLowerCase(); - Map>> entitiesInDb = new HashMap<>(); + Map>> entitiesInDb = new HashMap<>(); List tables = protoRequest.getTableList(); if (tables != null && !tables.isEmpty()) { @@ -364,7 +364,7 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entitiesInDb.put(dbAndTableName, null); continue; } - Set> partitions = new HashSet<>(); + Set> partitions = new HashSet<>(); LinkedHashMap partDesc = new LinkedHashMap<>(); for (int valIx = 0; valIx < table.getPartValCount(); ++valIx) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index 6a2abd4ddfc2..87d115df72ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -91,13 +91,7 @@ private List filterColumns(List columns, Matcher match } if (desc.isSorted()) { - result.sort( - new Comparator() { - @Override - public int compare(FieldSchema f1, FieldSchema f2) { - return f1.getName().compareTo(f2.getName()); - } - }); + result.sort(Comparator.comparing(FieldSchema::getName)); } return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index 2ecce54e088d..651b40c6f37d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -59,7 +59,6 @@ import java.io.DataOutputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; @@ -70,7 +69,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.Map.Entry; -import java.util.stream.Collectors; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.ALIGNMENT; @@ -171,11 +169,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column boolean isFormatted, boolean isOutputPadded) throws IOException { String partitionData = ""; if (columnPath == null) { - List partitionColumns = null; - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - if (table.isPartitioned()) { - partitionColumns = table.getEffectivePartCols(); - } + List partitionColumns = table.isPartitioned() ? table.getEffectivePartCols() : null; if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { TextMetaDataTable metaDataTable = new TextMetaDataTable(); @@ -202,13 +196,9 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column } private void addFormattedTableData(DataOutputStream out, Table table, Partition partition, boolean isOutputPadded) - throws IOException, UnsupportedEncodingException { - String formattedTableInfo = null; - if (partition != null) { - formattedTableInfo = getPartitionInformation(table, partition); - } else { - formattedTableInfo = getTableInformation(table, isOutputPadded); - } + throws IOException { + String formattedTableInfo = (partition != null) ? getPartitionInformation(table, partition) : + getTableInformation(table, isOutputPadded); if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { formattedTableInfo += getConstraintsInformation(table); @@ -335,7 +325,7 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List skewedCoumnNames = storageDesc.getSkewedInfo().getSkewedColNames().stream() .sorted() - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Columns:", skewedCoumnNames.toString(), tableInfo); } @@ -343,16 +333,16 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List> skewedColumnValues = storageDesc.getSkewedInfo().getSkewedColValues().stream() .sorted(new VectorComparator()) - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Values:", skewedColumnValues.toString(), tableInfo); } - Map, String> skewedColMap = new TreeMap<>(new VectorComparator()); + Map, String> skewedColMap = new TreeMap<>(new VectorComparator<>()); skewedColMap.putAll(storageDesc.getSkewedInfo().getSkewedColValueLocationMaps()); if (MapUtils.isNotEmpty(skewedColMap)) { formatOutput("Skewed Value to Path:", skewedColMap.toString(), tableInfo); Map, String> truncatedSkewedColMap = - new TreeMap, String>(new VectorComparator()); + new TreeMap<>(new VectorComparator<>()); // walk through existing map to truncate path so that test won't mask it then we can verify location is right Set, String>> entries = skewedColMap.entrySet(); for (Entry, String> entry : entries) { @@ -401,7 +391,7 @@ private void getPartitionMetaDataInformation(StringBuilder tableInfo, Partition } } - private class VectorComparator> implements Comparator>{ + private static final class VectorComparator> implements Comparator> { @Override public int compare(List listA, List listB) { for (int i = 0; i < listA.size() && i < listB.size(); i++) { @@ -436,7 +426,7 @@ private void displayAllParameters(Map params, StringBuilder tabl private void displayAllParameters(Map params, StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded) { - List keys = new ArrayList(params.keySet()); + List keys = new ArrayList<>(params.keySet()); Collections.sort(keys); for (String key : keys) { String value = params.get(key); @@ -624,7 +614,7 @@ private void addExtendedTableData(DataOutputStream out, Table table, Partition p } private void addExtendedConstraintData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { out.write(("Constraints").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.tabCode); @@ -656,7 +646,7 @@ private void addExtendedConstraintData(DataOutputStream out, Table table) } private void addExtendedStorageData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getStorageHandlerInfo() != null) { out.write(("StorageHandlerInfo").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.newLineCode); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index f25cadd40073..df2a92202bcc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -254,7 +253,7 @@ public static String getPartialName(Partition p, int level) throws HiveException * @throws HiveException */ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, - LinkedHashMap partSpec) + Map partSpec) throws HiveException { List partKeys = tbl.getPartitionKeys(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index a0906cfb0339..38c4dfb036b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.LinkedHashMap; import java.util.Map; import java.util.stream.IntStream; @@ -287,7 +286,7 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDesc partDesc, Object[] partitionValues) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); for (int i = 0; i < vrbCtx.partitionColumnCount; i++) { Object objectValue; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index 7af16770c059..782b4f6e5258 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -90,7 +90,6 @@ public List getValues() { Table table = this.getTable(); values = new ArrayList<>(); - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation for (FieldSchema fs : table.getEffectivePartCols()) { String val = partSpec.get(fs.getName()); values.add(val); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index b763c379bdc7..2453a0c64358 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -429,10 +429,10 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable // 1.2 Add column info corresponding to partition columns ArrayList partitionColumns = new ArrayList(); - for (FieldSchema part_col : viewTable.getEffectivePartCols()) { - colName = part_col.getName(); + for (FieldSchema partCol : viewTable.getEffectivePartCols()) { + colName = partCol.getName(); colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true); + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), null, true); rr.put(null, colName, colInfo); cInfoLst.add(colInfo); partitionColumns.add(colInfo); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index d284e0231a03..2e24dcd0087b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -124,7 +123,7 @@ public Partition(Table tbl, Map partSpec, Path location) throws public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject( Table tbl, Map partSpec, Path location) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : tbl.getEffectivePartCols()) { String val = partSpec.get(field.getName()); if (val == null || val.isEmpty()) { @@ -417,7 +416,7 @@ public Path[] getPath(Sample s) throws HiveException { } int scount = s.getSampleFraction(); - ArrayList ret = new ArrayList(); + List ret = new ArrayList<>(); if (bcount == scount) { ret.add(getBucketPath(s.getSampleNum() - 1)); @@ -429,7 +428,7 @@ public Path[] getPath(Sample s) throws HiveException { } // undersampling a bucket ret.add(getBucketPath((s.getSampleNum() - 1) % bcount)); - } else if (bcount > scount) { + } else { if ((bcount / scount) * scount != bcount) { throw new HiveException("Sample Count" + scount + " is not a divisor of bucket count " + bcount + " for table " @@ -440,11 +439,11 @@ public Path[] getPath(Sample s) throws HiveException { ret.add(getBucketPath(i * scount + (s.getSampleNum() - 1))); } } - return (ret.toArray(new Path[ret.size()])); + return (ret.toArray(new Path[0])); } } - public LinkedHashMap getSpec() { + public Map getSpec() { return table.createSpec(tPartition); } @@ -543,7 +542,7 @@ public void setLocation(String location) { */ public void setValues(Map partSpec) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : table.getEffectivePartCols()) { String val = partSpec.get(field.getName()); if (val == null) { @@ -583,12 +582,11 @@ public List getSkewedColNames() { return tPartition.getSd().getSkewedInfo().getSkewedColNames(); } - public void setSkewedValueLocationMap(List valList, String dirName) - throws HiveException { + public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tPartition.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tPartition.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -613,8 +611,7 @@ public int hashCode() { @Override public boolean equals(Object obj) { - if (obj instanceof Partition) { - Partition o = (Partition) obj; + if (obj instanceof Partition o) { return Objects.equals(tPartition, o.tPartition); } return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 13f075aebafc..3fe4df59676f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata; import java.io.IOException; +import java.io.Serial; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -31,6 +32,7 @@ import java.util.Objects; import java.util.Properties; import java.util.Set; + import java.util.stream.Collectors; import java.util.stream.Stream; @@ -48,6 +50,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; import org.apache.hadoop.hive.metastore.api.SourceTable; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -58,7 +61,6 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; @@ -102,6 +104,7 @@ */ public class Table implements Serializable { + @Serial private static final long serialVersionUID = 1L; static final private Logger LOG = LoggerFactory.getLogger("hive.ql.metadata.Table"); @@ -111,6 +114,7 @@ public class Table implements Serializable { /** * These fields are all cached fields. The information comes from tTable. */ + private List cachedPartCols; private transient Deserializer deserializer; private Class outputFormatClass; private Class inputFormatClass; @@ -120,8 +124,6 @@ public class Table implements Serializable { private transient StorageHandlerInfo storageHandlerInfo; private transient MaterializedViewMetadata materializedViewMetadata; - private List cachedPartCols; - private TableSpec tableSpec; private boolean materializedTable; @@ -221,7 +223,6 @@ public org.apache.hadoop.hive.metastore.api.Table getTTable() { */ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { this.tTable = tTable; - clearCachedPartCols(); } /** @@ -233,11 +234,11 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { { sd.setSerdeInfo(new SerDeInfo()); sd.setNumBuckets(-1); - sd.setBucketCols(new ArrayList()); - sd.setCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.setSortCols(new ArrayList()); - sd.getSerdeInfo().setParameters(new HashMap()); + sd.setBucketCols(new ArrayList<>()); + sd.setCols(new ArrayList<>()); + sd.setParameters(new HashMap<>()); + sd.setSortCols(new ArrayList<>()); + sd.getSerdeInfo().setParameters(new HashMap<>()); // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does // not support a table with no columns. sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); @@ -247,17 +248,17 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { sd.setInputFormat(SequenceFileInputFormat.class.getName()); sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName()); SkewedInfo skewInfo = new SkewedInfo(); - skewInfo.setSkewedColNames(new ArrayList()); - skewInfo.setSkewedColValues(new ArrayList>()); - skewInfo.setSkewedColValueLocationMaps(new HashMap, String>()); + skewInfo.setSkewedColNames(new ArrayList<>()); + skewInfo.setSkewedColValues(new ArrayList<>()); + skewInfo.setSkewedColValueLocationMaps(new HashMap<>()); sd.setSkewedInfo(skewInfo); } org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); { t.setSd(sd); - t.setPartitionKeys(new ArrayList()); - t.setParameters(new HashMap()); + t.setPartitionKeys(new ArrayList<>()); + t.setParameters(new HashMap<>()); t.setTableType(TableType.MANAGED_TABLE.toString()); t.setDbName(databaseName); t.setTableName(tableName); @@ -410,7 +411,7 @@ public void setStorageHandlerInfo(StorageHandlerInfo storageHandlerInfo) { this.storageHandlerInfo = storageHandlerInfo; } - final public Class getInputFormatClass() { + public final Class getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); @@ -430,7 +431,7 @@ final public Class getInputFormatClass() { return inputFormatClass; } - final public Class getOutputFormatClass() { + public final Class getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); @@ -464,7 +465,7 @@ public void setMaterializedTable(boolean materializedTable) { * Marker SemanticException, so that processing that allows for table validation failures * and appropriately handles them can recover from these types of SemanticExceptions */ - public class ValidationFailureSemanticException extends SemanticException{ + public static class ValidationFailureSemanticException extends SemanticException{ public ValidationFailureSemanticException(String s) { super(s); } @@ -534,9 +535,9 @@ public TableType getTableType() { return Enum.valueOf(TableType.class, tTable.getTableType()); } - public ArrayList getFields() { + public List getFields() { - ArrayList fields = new ArrayList(); + List fields = new ArrayList<>(); try { Deserializer decoder = getDeserializer(); @@ -611,6 +612,10 @@ public List getPartCols() { return partKeys; } + /** + * Returns partition columns, consulting the storage handler for non-native tables (e.g. Iceberg) + * where partition columns are not stored in the metastore. + */ public List getEffectivePartCols() { if (cachedPartCols != null) { return cachedPartCols; @@ -623,10 +628,6 @@ public List getEffectivePartCols() { return cachedPartCols; } - private void clearCachedPartCols() { - cachedPartCols = null; - } - private boolean isTableTypeSet() { if (tTable.getParameters() == null) { return false; @@ -642,8 +643,7 @@ public FieldSchema getPartColByName(String colName) { } public List getPartColNames() { - return getEffectivePartCols().stream().map(FieldSchema::getName) - .collect(Collectors.toList()); + return getEffectivePartCols().stream().map(FieldSchema::getName).toList(); } public boolean hasNonNativePartitionSupport() { @@ -701,7 +701,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tTable.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tTable.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -711,7 +711,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { public Map, String> getSkewedColValueLocationMaps() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValueLocationMaps() : new HashMap, String>(); + .getSkewedColValueLocationMaps() : new HashMap<>(); } public void setSkewedColValues(List> skewedValues) { @@ -720,7 +720,7 @@ public void setSkewedColValues(List> skewedValues) { public List> getSkewedColValues(){ return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValues() : new ArrayList>(); + .getSkewedColValues() : new ArrayList<>(); } public void setSkewedColNames(List skewedColNames) { @@ -729,7 +729,7 @@ public void setSkewedColNames(List skewedColNames) { public List getSkewedColNames() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColNames() : new ArrayList(); + .getSkewedColNames() : new ArrayList<>(); } public SkewedInfo getSkewedInfo() { @@ -791,15 +791,13 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - ArrayList allCols = new ArrayList<>(); - allCols.addAll(getCols()); + ArrayList allCols = new ArrayList<>(getCols()); allCols.addAll(getPartCols()); return allCols; } public void setPartCols(List partCols) { tTable.setPartitionKeys(partCols); - clearCachedPartCols(); } public String getCatName() { @@ -1039,12 +1037,12 @@ public boolean isMaterializedView() { * Use the information from this partition. * @return Partition name to value mapping. */ - public LinkedHashMap createSpec( + public Map createSpec( org.apache.hadoop.hive.metastore.api.Partition tp) { List fsl = getEffectivePartCols(); List tpl = tp.getValues(); - LinkedHashMap spec = new LinkedHashMap(fsl.size()); + Map spec = LinkedHashMap.newLinkedHashMap(fsl.size()); for (int i = 0; i < fsl.size(); i++) { FieldSchema fs = fsl.get(i); String value = tpl.get(i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f4b4c2ff3bad..82f81861a4dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1767,7 +1767,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma * allColumnNameList and allTypeInfoList variables -- into the data and partition columns. */ - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); dataColumnCount = dataAndPartColumnCount - partitionColumnCount; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index 91340b1b76ef..dd4478e0acc9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -51,7 +50,7 @@ public class PartExprEvalUtils { * @throws HiveException */ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws HiveException { - LinkedHashMap partSpec = p.getSpec(); + Map partSpec = p.getSpec(); Properties partProps = p.getSchema(); String[] partKeyTypes; @@ -59,8 +58,8 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv if (!partSpec.keySet().containsAll(expr.getCols())) { return null; } - partKeyTypes = p.getTable().getStorageHandler().getPartitionKeys(p.getTable()).stream() - .map(FieldSchema::getType).toArray(String[]::new); + partKeyTypes = p.getTable().getEffectivePartCols().stream().map(FieldSchema::getType) + .toArray(String[]::new); } else { String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); partKeyTypes = pcolTypes.trim().split(":"); @@ -104,7 +103,7 @@ public static Pair prepareExpr( ExprNodeDesc expr, List partColumnNames, List partColumnTypeInfos) throws HiveException { // Create the row object - List partObjectInspectors = new ArrayList(); + List partObjectInspectors = new ArrayList<>(); for (int i = 0; i < partColumnNames.size(); i++) { partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( partColumnTypeInfos.get(i))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index b3e25950985e..9109f9cb6086 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -83,7 +83,6 @@ public ColumnStatsAutoGatherContext(SemanticAnalyzer sa, HiveConf conf, this.isInsertInto = isInsertInto; this.origCtx = ctx; columns = tbl.getCols(); - // current behaviour intact until we have getCols() giving only non-partition columns for non native tables as well partitionColumns = tbl.getPartCols(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index 353ad9e2b574..6f32b0a9293a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -229,7 +229,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause String deleteExtraPredicate) throws SemanticException { assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED; assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE; - Map newValuesMap = new HashMap<>(targetTable.getAllCols().size()); + Map newValuesMap = HashMap.newHashMap(targetTable.getAllCols().size()); ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0); //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions //before re-parsing, i.e. they are known to SemanticAnalyzer logic @@ -302,7 +302,7 @@ private List findWhenClauses(ASTNode tree, int start) throws SemanticEx "Unexpected node type found: " + whenClause.getType() + addParseInfo(whenClause); whenClauses.add(whenClause); } - if (whenClauses.size() <= 0) { + if (whenClauses.isEmpty()) { //Futureproofing: the parser will actually not allow this throw new SemanticException("Must have at least 1 WHEN clause in MERGE statement"); } @@ -498,11 +498,7 @@ private void handleUnresolvedColumns() { private void addColumn2Table(String tableName, String columnName) { tableName = tableName.toLowerCase(); //normalize name for mapping tableNamesFound.add(tableName); - List cols = table2column.get(tableName); - if (cols == null) { - cols = new ArrayList<>(); - table2column.put(tableName, cols); - } + List cols = table2column.computeIfAbsent(tableName, k -> new ArrayList<>()); //we want to preserve 'columnName' as it was in original input query so that rewrite //looks as much as possible like original query cols.add(columnName); @@ -525,7 +521,7 @@ private String getPredicate() { } StringBuilder sb = new StringBuilder(); for (String col : targetCols) { - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(" AND "); } //but preserve table name in SQL @@ -604,17 +600,15 @@ protected String getMatchedText(ASTNode n) { } protected boolean isAliased(ASTNode n) { - switch (n.getType()) { - case HiveParser.TOK_TABREF: - return findTabRefIdxs(n)[0] != 0; - case HiveParser.TOK_TABNAME: - return false; - case HiveParser.TOK_SUBQUERY: + return switch (n.getType()) { + case HiveParser.TOK_TABREF -> findTabRefIdxs(n)[0] != 0; + case HiveParser.TOK_TABNAME -> false; + case HiveParser.TOK_SUBQUERY -> { assert n.getChildCount() > 1 : "Expected Derived Table to be aliased"; - return true; - default: - throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); - } + yield true; + } + default -> throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); + }; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 27bde7acfe2d..0904e2c9b5c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -123,15 +123,11 @@ public static ASTNode parse( * @return boolean */ public static boolean isJoinToken(ASTNode node) { - switch (node.getToken().getType()) { - case HiveParser.TOK_JOIN: - case HiveParser.TOK_LEFTOUTERJOIN: - case HiveParser.TOK_RIGHTOUTERJOIN: - case HiveParser.TOK_FULLOUTERJOIN: - return true; - default: - return false; - } + return switch (node.getToken().getType()) { + case HiveParser.TOK_JOIN, HiveParser.TOK_LEFTOUTERJOIN, HiveParser.TOK_RIGHTOUTERJOIN, + HiveParser.TOK_FULLOUTERJOIN -> true; + default -> false; + }; } /** @@ -163,12 +159,10 @@ public static List validateColumnNameUniqueness( // but it should not be a major bottleneck as the number of columns are // anyway not so big Iterator iterCols = fieldSchemas.iterator(); - List colNames = new ArrayList(); + List colNames = new ArrayList<>(); while (iterCols.hasNext()) { String colName = iterCols.next().getName(); - Iterator iter = colNames.iterator(); - while (iter.hasNext()) { - String oldColName = iter.next(); + for (String oldColName : colNames) { if (colName.equalsIgnoreCase(oldColName)) { throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES .getMsg(oldColName)); @@ -286,7 +280,7 @@ public static Pair containsTokenOfType(ASTNode root, Integer .. final Set tokensToMatch = new HashSet<>(Arrays.asList(tokens)); final String[] matched = {null}; - boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate() { + boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate<>() { @Override public boolean apply(ASTNode node) { if (tokensToMatch.contains(node.getType())) { @@ -302,7 +296,7 @@ public boolean apply(ASTNode node) { } public static boolean containsTokenOfType(ASTNode root, PTFUtils.Predicate predicate) { - Queue queue = new ArrayDeque(); + Queue queue = new ArrayDeque<>(); // BFS queue.add(root); @@ -535,7 +529,7 @@ public static String getKeywords(Set excludes) { if (excludes != null && excludes.contains(name)) { continue; } - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(","); } sb.append(name); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 915ff21bd912..e1ca15a76b5e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12040,10 +12040,10 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns - for (FieldSchema part_col : tab.getPartCols()) { - LOG.trace("Adding partition col: " + part_col); - rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); + for (FieldSchema partCol : tab.getPartCols()) { + LOG.trace("Adding partition col: " + partCol); + rwsch.put(alias, partCol.getName(), new ColumnInfo(partCol.getName(), + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), alias, true)); } // put virtual columns into RowResolver. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 0dcfe72d7f5b..b5b4662a1491 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -57,7 +57,7 @@ public class PartitionDesc implements Serializable, Cloneable { private static final Interner> CLASS_INTERNER = Interners.newWeakInterner(); private TableDesc tableDesc; - private LinkedHashMap partSpec; + private Map partSpec; private Class inputFileFormatClass; private Class outputFileFormatClass; private Properties properties; @@ -73,7 +73,7 @@ public void setBaseFileName(String baseFileName) { public PartitionDesc() { } - public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { + public PartitionDesc(final TableDesc table, final Map partSpec) { this.tableDesc = table; setPartSpec(partSpec); } @@ -138,11 +138,11 @@ public void setTableDesc(TableDesc tableDesc) { } @Explain(displayName = "partition values", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public LinkedHashMap getPartSpec() { + public Map getPartSpec() { return partSpec; } - public void setPartSpec(final LinkedHashMap partSpec) { + public void setPartSpec(final Map partSpec) { StringInternUtils.internValuesInMap(partSpec); this.partSpec = partSpec; } diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java index 0f5d7b915168..f81f8e9ec816 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java @@ -82,7 +82,7 @@ public static final CacheTag build(String tableName) { return new TableCacheTag(tableName); } - public static final CacheTag build(String tableName, LinkedHashMap partDescMap) { + public static final CacheTag build(String tableName, Map partDescMap) { if (StringUtils.isEmpty(tableName) || partDescMap == null || partDescMap.isEmpty()) { throw new IllegalArgumentException(); } From 1dca65285749eac2399c9622d66dbfe90df50ad3 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Sun, 26 Apr 2026 15:07:04 +0530 Subject: [PATCH 08/20] updated table api and usage --- .../mapreduce/TestHCatMultiOutputFormat.java | 2 +- .../create/like/CreateTableLikeOperation.java | 2 +- .../formatter/TextDescTableFormatter.java | 2 +- .../JsonShowTableStatusFormatter.java | 2 +- .../TextShowTableStatusFormatter.java | 2 +- .../ddl/table/partition/PartitionUtils.java | 2 +- .../AlterTableExchangePartitionAnalyzer.java | 4 +-- .../partition/show/ShowPartitionAnalyzer.java | 6 ++--- .../archive/AlterTableArchiveOperation.java | 2 +- .../archive/AlterTableArchiveUtils.java | 2 +- .../archive/AlterTableUnarchiveOperation.java | 2 +- .../create/AbstractCreateViewAnalyzer.java | 2 +- .../hadoop/hive/ql/exec/ArchiveUtils.java | 6 ++--- .../hadoop/hive/ql/exec/DDLPlanUtils.java | 4 +-- .../apache/hadoop/hive/ql/exec/MoveTask.java | 2 +- .../apache/hadoop/hive/ql/exec/Utilities.java | 2 +- .../hive/ql/exec/repl/ReplLoadTask.java | 2 +- .../hive/ql/metadata/DummyPartition.java | 2 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 16 ++++++------ .../HiveMaterializedViewsRegistry.java | 2 +- .../hadoop/hive/ql/metadata/Partition.java | 8 +++--- .../apache/hadoop/hive/ql/metadata/Table.java | 25 +++++++++++++------ .../hive/ql/optimizer/GenMapRedUtils.java | 2 +- .../ql/optimizer/ppr/PartExprEvalUtils.java | 2 +- .../ql/optimizer/ppr/PartitionPruner.java | 2 +- .../ql/parse/AcidExportSemanticAnalyzer.java | 2 +- .../hive/ql/parse/BaseSemanticAnalyzer.java | 2 +- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 4 +-- .../hive/ql/parse/ImportSemanticAnalyzer.java | 2 +- .../hive/ql/parse/MergeSemanticAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/ParseUtils.java | 4 +-- .../hive/ql/parse/SemanticAnalyzer.java | 4 +-- .../rewrite/sql/MultiInsertSqlGenerator.java | 2 +- .../NativeAcidMultiInsertSqlGenerator.java | 6 ++--- .../hive/ql/stats/ColStatsProcessor.java | 2 +- 35 files changed, 73 insertions(+), 62 deletions(-) diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java index 05e5495d2656..d87158b23fae 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -374,7 +374,7 @@ private List getTableData(String table, String database) throws Exceptio Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; - if (!tbl.getEffectivePartCols().isEmpty()) { + if (!tbl.getPartCols().isEmpty()) { List partitions = hive.getPartitions(tbl); List partDesc = new ArrayList(); List partLocs = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java index e10d4bdb00ce..770724b90abf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java @@ -100,7 +100,7 @@ private Table createViewLikeTable(Table oldTable) throws HiveException { setUserSpecifiedLocation(table); table.setFields(oldTable.getCols()); - table.setPartCols(oldTable.getEffectivePartCols()); + table.setPartCols(oldTable.getPartCols()); if (desc.getDefaultSerdeProps() != null) { for (Map.Entry e : desc.getDefaultSerdeProps().entrySet()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index 651b40c6f37d..dc6c3680ca1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -169,7 +169,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column boolean isFormatted, boolean isOutputPadded) throws IOException { String partitionData = ""; if (columnPath == null) { - List partitionColumns = table.isPartitioned() ? table.getEffectivePartCols() : null; + List partitionColumns = table.isPartitioned() ? table.getPartCols() : null; if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { TextMetaDataTable metaDataTable = new TextMetaDataTable(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java index 405417916990..073db26e756c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/JsonShowTableStatusFormatter.java @@ -69,7 +69,7 @@ private Map makeOneTableStatus(Table table, Hive db, HiveConf co builder.put("partitioned", table.isPartitioned()); if (table.isPartitioned()) { - builder.put("partitionColumns", JsonDescTableFormatter.createColumnsInfo(table.getEffectivePartCols(), + builder.put("partitionColumns", JsonDescTableFormatter.createColumnsInfo(table.getPartCols(), Collections.emptyList())); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java index 33205bebcbea..552dc310465b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/TextShowTableStatusFormatter.java @@ -73,7 +73,7 @@ private void writeStorageInfo(DataOutputStream out, Partition partition, Table t private void writeColumnsInfo(DataOutputStream out, Table table) throws IOException, UnsupportedEncodingException { String columns = MetaStoreUtils.getDDLFromFieldSchema("columns", table.getCols()); String partitionColumns = table.isPartitioned() ? - MetaStoreUtils.getDDLFromFieldSchema("partition_columns", table.getEffectivePartCols()) : ""; + MetaStoreUtils.getDDLFromFieldSchema("partition_columns", table.getPartCols()) : ""; out.write(Utilities.newLineCode); out.write(("columns:" + columns).getBytes(StandardCharsets.UTF_8)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java index 0011b25df358..5882e4616506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java @@ -149,7 +149,7 @@ public static List getPartitionsWithSpecs(Hive db, Table table, GetPa } private static String tablePartitionColNames(Table table) { - List partCols = table.getEffectivePartCols(); + List partCols = table.getPartCols(); return String.join("/", partCols.toString()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java index 91c8da2f74ba..6485627c7e6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java @@ -84,7 +84,7 @@ protected void analyzeCommand(TableName tableName, Map partition if (AcidUtils.isTransactionalTable(sourceTable) || AcidUtils.isTransactionalTable(destTable)) { throw new SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg()); } - List sourceProjectFilters = MetaStoreUtils.getPvals(sourceTable.getEffectivePartCols(), partitionSpecs); + List sourceProjectFilters = MetaStoreUtils.getPvals(sourceTable.getPartCols(), partitionSpecs); // check if source partition exists GetPartitionsFilterSpec sourcePartitionsFilterSpec = new GetPartitionsFilterSpec(); @@ -106,7 +106,7 @@ protected void analyzeCommand(TableName tableName, Map partition throw new SemanticException(ErrorMsg.PARTITION_VALUE_NOT_CONTINUOUS.getMsg(partitionSpecs.toString())); } - List destProjectFilters = MetaStoreUtils.getPvals(destTable.getEffectivePartCols(), partitionSpecs); + List destProjectFilters = MetaStoreUtils.getPvals(destTable.getPartCols(), partitionSpecs); // check if dest partition exists GetPartitionsFilterSpec getDestPartitionsFilterSpec = new GetPartitionsFilterSpec(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java index 4a7aea0490f3..c0bffcebdb23 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/show/ShowPartitionAnalyzer.java @@ -102,7 +102,7 @@ ExprNodeDesc getShowPartitionsFilter(Table table, ASTNode command) throws Semant if (astChild.getType() == HiveParser.TOK_WHERE) { RowResolver rwsch = new RowResolver(); Map colTypes = new HashMap(); - for (FieldSchema fs : table.getEffectivePartCols()) { + for (FieldSchema fs : table.getPartCols()) { rwsch.put(table.getTableName(), fs.getName(), new ColumnInfo(fs.getName(), TypeInfoFactory.stringTypeInfo, null, true)); colTypes.put(fs.getName().toLowerCase(), fs.getType()); @@ -202,8 +202,8 @@ private String getShowPartitionsOrder(Table table, ASTNode command) throws Seman if (astChild.getType() == HiveParser.TOK_ORDERBY) { Map poses = new HashMap(); RowResolver rwsch = new RowResolver(); - for (int i = 0; i < table.getEffectivePartCols().size(); i++) { - FieldSchema fs = table.getEffectivePartCols().get(i); + for (int i = 0; i < table.getPartCols().size(); i++) { + FieldSchema fs = table.getPartCols().get(i); rwsch.put(table.getTableName(), fs.getName(), new ColumnInfo(fs.getName(), TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()), null, true)); poses.put(fs.getName().toLowerCase(), i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java index 67f5b7946760..e218e590a24e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/archive/AlterTableArchiveOperation.java @@ -129,7 +129,7 @@ private Path getOriginalDir(Table table, PartSpecInfo partitionSpecInfo, List partitionColumns) throws SemanticException { - if (oldView.getEffectivePartCols().isEmpty() || oldView.getEffectivePartCols().equals(partitionColumns)) { + if (oldView.getPartCols().isEmpty() || oldView.getPartCols().equals(partitionColumns)) { return; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index df2a92202bcc..e333ed85f439 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -73,7 +73,7 @@ static public PartSpecInfo create(Table tbl, Map partSpec) // ARCHIVE PARTITION(hr='13') won't List prefixFields = new ArrayList(); List prefixValues = new ArrayList(); - List partCols = tbl.getEffectivePartCols(); + List partCols = tbl.getPartCols(); Iterator itrPsKeys = partSpec.keySet().iterator(); for (FieldSchema fs : partCols) { if (!itrPsKeys.hasNext()) { @@ -221,7 +221,7 @@ public static int getArchivingLevel(Partition p) throws HiveException { * @throws HiveException */ public static String getPartialName(Partition p, int level) throws HiveException { - List fields = p.getTable().getEffectivePartCols().subList(0, level); + List fields = p.getTable().getPartCols().subList(0, level); List values = p.getValues().subList(0, level); try { return Warehouse.makePartName(fields, values); @@ -272,7 +272,7 @@ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, Map spec = new HashMap(partSpec); List reversedKeys = new ArrayList(); - for (FieldSchema fs : tbl.getEffectivePartCols()) { + for (FieldSchema fs : tbl.getPartCols()) { if (spec.containsKey(fs.getName())) { reversedKeys.add(fs.getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index 3fe753de5e08..9196a3441200 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -293,7 +293,7 @@ public String getPartitionActualName(Partition pt) { */ private Map getPartitionColumnToPrimitiveCategory(Partition pt) { Map resultMap = new HashMap<>(); - for (FieldSchema schema: pt.getTable().getEffectivePartCols()) { + for (FieldSchema schema: pt.getTable().getPartCols()) { resultMap.put( schema.getName(), ((PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(schema.getType())).getPrimitiveCategory() @@ -975,7 +975,7 @@ private String getComment(Table table) { } private String getPartitionsForView(Table table) { - List partitionKeys = table.getEffectivePartCols(); + List partitionKeys = table.getPartCols(); if (partitionKeys.isEmpty()) { return ""; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index a5ec3a023092..3eca5531f127 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -633,7 +633,7 @@ public void logMessage(LoadTableDesc tbd) { private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, TaskInformation ti) throws HiveException, IOException, InvalidOperationException { - List partVals = MetaStoreUtils.getPvals(table.getEffectivePartCols(), tbd.getPartitionSpec()); + List partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec()); db.validatePartitionNameCharacters(partVals); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("loadPartition called from " + tbd.getSourcePath() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 9cd67fcb9619..a29e532b113b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -4321,7 +4321,7 @@ public static void setPartitionColumnNames(Configuration conf, TableScanOperator if (metadata == null) { return; } - List partCols = metadata.getEffectivePartCols(); + List partCols = metadata.getPartCols(); if (partCols != null && !partCols.isEmpty()) { conf.set(serdeConstants.LIST_PARTITION_COLUMNS, MetaStoreUtils.getColumnNamesFromFieldSchema(partCols)); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java index a7829d7b78bf..9287fd75e766 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java @@ -548,7 +548,7 @@ public static Task createViewTask(MetaData metaData, String dbNameToLoadIn, H } CreateViewDesc desc = new CreateViewDesc(dbDotView, table.getCols(), null, table.getParameters(), - table.getPartColNames(), false, false, viewOriginalText, viewExpandedText, table.getEffectivePartCols()); + table.getPartColNames(), false, false, viewOriginalText, viewExpandedText, table.getPartCols()); desc.setReplicationSpec(metaData.getReplicationSpec()); desc.setOwnerName(table.getOwner()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index 782b4f6e5258..9f871d05feb3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -90,7 +90,7 @@ public List getValues() { Table table = this.getTable(); values = new ArrayList<>(); - for (FieldSchema fs : table.getEffectivePartCols()) { + for (FieldSchema fs : table.getPartCols()) { String val = partSpec.get(fs.getName()); values.add(val); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 0c583264816b..cd33896807bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -869,7 +869,7 @@ public void createTable(String tableName, List columns, List par FieldSchema part = new FieldSchema(); part.setName(partCol); part.setType(STRING_TYPE_NAME); // default partition key - tbl.getEffectivePartCols().add(part); + tbl.getPartCols().add(part); } } tbl.setSerializationLib(LazySimpleSerDe.class.getName()); @@ -1246,8 +1246,8 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio throws HiveException { try { Map newPartSpec = newPart.getSpec(); - if (oldPartSpec.keySet().size() != tbl.getEffectivePartCols().size() - || newPartSpec.keySet().size() != tbl.getEffectivePartCols().size()) { + if (oldPartSpec.keySet().size() != tbl.getPartCols().size() + || newPartSpec.keySet().size() != tbl.getPartCols().size()) { throw new HiveException("Unable to rename partition to the same name: number of partition cols don't match. "); } if (!oldPartSpec.keySet().equals(newPartSpec.keySet())){ @@ -1255,7 +1255,7 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio } List pvals = new ArrayList(); - for (FieldSchema field : tbl.getEffectivePartCols()) { + for (FieldSchema field : tbl.getPartCols()) { String val = oldPartSpec.get(field.getName()); if (val == null || val.length() == 0) { throw new HiveException("get partition: Value for key " @@ -3832,7 +3832,7 @@ public Partition getPartition(Table tbl, Map partSpec, boolean forceCreate, String partPath, boolean inheritTableSpecs) throws HiveException { tbl.validatePartColumnNames(partSpec, true); List pvals = new ArrayList(); - for (FieldSchema field : tbl.getEffectivePartCols()) { + for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); // enable dynamic partitioning if ((val == null && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMIC_PARTITIONING)) @@ -4221,7 +4221,7 @@ public List getPartitionNames(Table tbl, Map partSpec, s if (tbl.hasNonNativePartitionSupport()) { return tbl.getStorageHandler().getPartitionNames(tbl, partSpec); } - List pvals = MetaStoreUtils.getPvals(tbl.getEffectivePartCols(), partSpec); + List pvals = MetaStoreUtils.getPvals(tbl.getPartCols(), partSpec); return getPartitionNamesByPartitionVals(tbl, pvals, max); } @@ -4463,7 +4463,7 @@ private List getPartitionsWithAuth(Table tbl, Map par throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED, tbl.getTableName()); } - List partialPvals = MetaStoreUtils.getPvals(tbl.getEffectivePartCols(), partialPartSpec); + List partialPvals = MetaStoreUtils.getPvals(tbl.getPartCols(), partialPartSpec); List partitions = null; try { @@ -4772,7 +4772,7 @@ static List convertFromPartSpec(Iterator iterator, Tab || partitionWithoutSD.getRelativePath().isEmpty()) { if (tbl.getDataLocation() != null) { Path partPath = new Path(tbl.getDataLocation(), - Warehouse.makePartName(tbl.getEffectivePartCols(), + Warehouse.makePartName(tbl.getPartCols(), partitionWithoutSD.getValues())); partitionLocation = partPath.toString(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 2453a0c64358..ee55fde100f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -429,7 +429,7 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable // 1.2 Add column info corresponding to partition columns ArrayList partitionColumns = new ArrayList(); - for (FieldSchema partCol : viewTable.getEffectivePartCols()) { + for (FieldSchema partCol : viewTable.getPartCols()) { colName = partCol.getName(); colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), null, true); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 2e24dcd0087b..4715775d3b4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -124,7 +124,7 @@ public Partition(Table tbl, Map partSpec, Path location) throws public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject( Table tbl, Map partSpec, Path location) throws HiveException { List pvals = new ArrayList<>(); - for (FieldSchema field : tbl.getEffectivePartCols()) { + for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null || val.isEmpty()) { throw new HiveException("partition spec is invalid; field " @@ -173,7 +173,7 @@ protected void initialize(Table table, // table partition (not a view partition) if (table.getDataLocation() != null) { Path partPath = new Path(table.getDataLocation(), - Warehouse.makePartName(table.getEffectivePartCols(), tPartition.getValues())); + Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); tPartition.getSd().setLocation(partPath.toString()); } } @@ -200,7 +200,7 @@ protected void initialize(Table table, public String getName() { try { - return Warehouse.makePartName(table.getEffectivePartCols(), tPartition.getValues()); + return Warehouse.makePartName(table.getPartCols(), tPartition.getValues()); } catch (MetaException e) { throw new RuntimeException(e); } @@ -543,7 +543,7 @@ public void setLocation(String location) { public void setValues(Map partSpec) throws HiveException { List pvals = new ArrayList<>(); - for (FieldSchema field : table.getEffectivePartCols()) { + for (FieldSchema field : table.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null) { throw new HiveException( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 3fe4df59676f..f841ff7b9fc7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -32,6 +32,7 @@ import java.util.Objects; import java.util.Properties; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -603,7 +604,7 @@ public boolean equals(Object obj) { && Objects.equals(snapshotRef, other.snapshotRef); } - public List getPartCols() { + private List getNativePartCols() { List partKeys = tTable.getPartitionKeys(); if (partKeys == null) { partKeys = new ArrayList<>(); @@ -616,14 +617,14 @@ public List getPartCols() { * Returns partition columns, consulting the storage handler for non-native tables (e.g. Iceberg) * where partition columns are not stored in the metastore. */ - public List getEffectivePartCols() { + public List getPartCols() { if (cachedPartCols != null) { return cachedPartCols; } if (isTableTypeSet() && hasNonNativePartitionSupport()) { cachedPartCols = getStorageHandler().getPartitionKeys(this); } else { - cachedPartCols = getPartCols(); + cachedPartCols = getNativePartCols(); } return cachedPartCols; } @@ -643,7 +644,7 @@ public FieldSchema getPartColByName(String colName) { } public List getPartColNames() { - return getEffectivePartCols().stream().map(FieldSchema::getName).toList(); + return getPartCols().stream().map(FieldSchema::getName).toList(); } public boolean hasNonNativePartitionSupport() { @@ -758,7 +759,17 @@ private boolean isField(String col) { } public List getCols() { - return getColsInternal(false); + if (!isNonNative()) { + return getColsInternal(false); + } + List nonPartFields = new ArrayList<>(); + Set partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); + for (FieldSchema field : getColsInternal(false)) { + if (!partFieldsName.contains(field.getName())) { + nonPartFields.add(field); + } + } + return nonPartFields; } public List getColsForMetastore() { @@ -842,7 +853,7 @@ public void setOutputFormatClass(String name) throws HiveException { public boolean isPartitioned() { return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : - CollectionUtils.isNotEmpty(getEffectivePartCols()); + CollectionUtils.isNotEmpty(getPartCols()); } public void setFields(List fields) { @@ -1040,7 +1051,7 @@ public boolean isMaterializedView() { public Map createSpec( org.apache.hadoop.hive.metastore.api.Partition tp) { - List fsl = getEffectivePartCols(); + List fsl = getPartCols(); List tpl = tp.getValues(); Map spec = LinkedHashMap.newLinkedHashMap(fsl.size()); for (int i = 0; i < fsl.size(); i++) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index c560783f070f..bd1be003a512 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -2149,7 +2149,7 @@ static void usePartitionColumns(Properties properties, Table table, List if (properties.containsKey(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS)) { usePartitionColumns(properties, partColNames); } else { - List partCols = table.getEffectivePartCols(); + List partCols = table.getPartCols(); String partNames = partCols.stream().map(FieldSchema::getName).collect(Collectors.joining("/")); String partTypes = partCols.stream().map(FieldSchema::getType).collect(Collectors.joining(":")); properties.setProperty( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index dd4478e0acc9..d08fe92208ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -58,7 +58,7 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv if (!partSpec.keySet().containsAll(expr.getCols())) { return null; } - partKeyTypes = p.getTable().getEffectivePartCols().stream().map(FieldSchema::getType) + partKeyTypes = p.getTable().getPartCols().stream().map(FieldSchema::getType) .toArray(String[]::new); } else { String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 26fba8533ecc..7574ad5f6d24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -521,7 +521,7 @@ static private boolean pruneBySequentialScan(Table tab, List partitio } private static List extractPartColTypes(Table tab) { - List pCols = tab.getEffectivePartCols(); + List pCols = tab.getPartCols(); List partColTypeInfos = new ArrayList<>(pCols.size()); for (FieldSchema pCol : pCols) { partColTypeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(pCol.getType())); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java index c42f39fa6309..05f3b85f271f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java @@ -175,7 +175,7 @@ private void analyzeAcidExport(ASTNode ast, Table exportTable, ASTNode tokRefOrN //now generate insert statement //insert into newTableName select * from ts StringBuilder rewrittenQueryStr = generateExportQuery( - newTable.getEffectivePartCols(), + newTable.getPartCols(), tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); ReparseResult rr = ParseUtils.parseRewrittenQuery(ctx, rewrittenQueryStr); Context rewrittenCtx = rr.rewrittenCtx; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 211462775911..f616049d8591 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1202,7 +1202,7 @@ public TableSpec(Table tableHandle, List partitions) { if (partitions != null && !partitions.isEmpty()) { this.specType = SpecType.STATIC_PARTITION; this.partitions = partitions; - List partCols = this.tableHandle.getEffectivePartCols(); + List partCols = this.tableHandle.getPartCols(); this.partSpec = new LinkedHashMap<>(); for (FieldSchema partCol : partCols) { partSpec.put(partCol.getName(), null); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 5c9f898bea13..9bf6269334e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -207,7 +207,7 @@ private static CharSequence genPartitionClause(Table tbl, List pa private static String getColTypeOf(Table tbl, String partKey) { - for (FieldSchema fs : tbl.getEffectivePartCols()) { + for (FieldSchema fs : tbl.getPartCols()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } @@ -309,7 +309,7 @@ private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas, if (isPartitionStats) { if (partTransformSpec == null) { - for (FieldSchema fs : tbl.getEffectivePartCols()) { + for (FieldSchema fs : tbl.getPartCols()) { String identifier = unparseIdentifier(fs.getName(), conf); rewrittenQueryBuilder.append(", ").append(identifier); columnNamesBuilder.append(", ").append(identifier); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index e671ad24d7b9..dcf197a2c201 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -876,7 +876,7 @@ private static void checkTable(Table table, ImportTableDesc tableDesc, } { // check partitioning column order and types - List existingTablePartCols = table.getEffectivePartCols(); + List existingTablePartCols = table.getPartCols(); List importedTablePartCols = tableDesc.getPartCols(); if (!EximUtil.schemaCompare(importedTablePartCols, existingTablePartCols)) { throw new SemanticException( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index 6f32b0a9293a..ddde37433b5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -431,7 +431,7 @@ private static final class OnClauseAnalyzer { HiveConf conf, String onClauseAsString) { this.onClause = onClause; allTargetTableColumns.addAll(targetTable.getCols()); - allTargetTableColumns.addAll(targetTable.getEffectivePartCols()); + allTargetTableColumns.addAll(targetTable.getPartCols()); this.targetTableNameInSourceQuery = unescapeIdentifier(targetTableNameInSourceQuery); this.conf = conf; this.onClauseAsString = onClauseAsString; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 0904e2c9b5c5..9edbe5b05e13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -575,7 +575,7 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.getEffectivePartCols(); + List partitionKeys = table.getPartCols(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } @@ -685,7 +685,7 @@ public static Map> getFullPartitionSpecs( */ private static int calculatePartPrefix(Table tbl, Set partSpecKeys) { int partPrefixToDrop = 0; - for (FieldSchema fs : tbl.getEffectivePartCols()) { + for (FieldSchema fs : tbl.getPartCols()) { if (!partSpecKeys.contains(fs.getName())) { break; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index e1ca15a76b5e..50d02acf2464 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2210,7 +2210,7 @@ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase } } else { // partition spec is not specified but column schema can have partitions specified - for(FieldSchema f : targetTable.getEffectivePartCols()) { + for(FieldSchema f : targetTable.getPartCols()) { //parser only allows foo(a,b), not foo(foo.a, foo.b) targetColumns.remove(f.getName()); } @@ -12306,7 +12306,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String if (tab.isPartitioned() && !tab.hasNonNativePartitionSupport()) { List cols = new ArrayList(); if (qbp.getAnalyzeRewrite() != null) { - List partitionCols = tab.getEffectivePartCols(); + List partitionCols = tab.getPartCols(); for (FieldSchema fs : partitionCols) { cols.add(fs.getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java index 928564012985..7587daf13055 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java @@ -152,7 +152,7 @@ public void appendPartColsOfTargetTableWithComma(String alias) { return; } queryStr.append(','); - appendCols(targetTable.getEffectivePartCols(), alias, null, FieldSchema::getName); + appendCols(targetTable.getPartCols(), alias, null, FieldSchema::getName); } public void appendAllColsOfTargetTable(String prefix) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java index 94cda746396b..87e426800442 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/NativeAcidMultiInsertSqlGenerator.java @@ -36,7 +36,7 @@ public NativeAcidMultiInsertSqlGenerator(Table table, String targetTableFullName @Override public void appendAcidSelectColumns(Operation operation) { queryStr.append("ROW__ID,"); - for (FieldSchema fieldSchema : targetTable.getEffectivePartCols()) { + for (FieldSchema fieldSchema : targetTable.getPartCols()) { String identifier = HiveUtils.unparseIdentifier(fieldSchema.getName(), this.conf); queryStr.append(identifier); queryStr.append(","); @@ -45,9 +45,9 @@ public void appendAcidSelectColumns(Operation operation) { @Override public List getDeleteValues(Operation operation) { - List deleteValues = new ArrayList<>(1 + targetTable.getEffectivePartCols().size()); + List deleteValues = new ArrayList<>(1 + targetTable.getPartCols().size()); deleteValues.add(qualify("ROW__ID")); - for (FieldSchema fieldSchema : targetTable.getEffectivePartCols()) { + for (FieldSchema fieldSchema : targetTable.getPartCols()) { deleteValues.add(qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf))); } return deleteValues; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java index cb257ee216f2..be62d94019ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java @@ -156,7 +156,7 @@ private boolean constructColumnStatsFromPackedRows(Table tbl, List Date: Sun, 24 May 2026 13:22:20 +0530 Subject: [PATCH 09/20] introduced index optimization --- .../apache/hadoop/hive/ql/metadata/Table.java | 67 ++++++++++++++----- .../parse/ColumnStatsAutoGatherContext.java | 23 ++++--- 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index f841ff7b9fc7..e379f39aa29c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -32,6 +32,7 @@ import java.util.Objects; import java.util.Properties; import java.util.Set; +import java.util.TreeMap; import java.util.stream.Collectors; import java.util.stream.Collectors; @@ -39,6 +40,7 @@ import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -115,7 +117,8 @@ public class Table implements Serializable { /** * These fields are all cached fields. The information comes from tTable. */ - private List cachedPartCols; + private List tablePartCols; + private Map> inputColumnIndexByName; private transient Deserializer deserializer; private Class outputFormatClass; private Class inputFormatClass; @@ -200,8 +203,8 @@ public Table makeCopy() { newTab.setMetaTable(this.getMetaTable()); newTab.setSnapshotRef(this.getSnapshotRef()); - if (this.cachedPartCols != null) { - newTab.cachedPartCols = new ArrayList<>(this.cachedPartCols); + if (this.tablePartCols != null) { + newTab.tablePartCols = new ArrayList<>(this.tablePartCols); } return newTab; } @@ -618,15 +621,15 @@ private List getNativePartCols() { * where partition columns are not stored in the metastore. */ public List getPartCols() { - if (cachedPartCols != null) { - return cachedPartCols; + if (tablePartCols != null) { + return tablePartCols; } if (isTableTypeSet() && hasNonNativePartitionSupport()) { - cachedPartCols = getStorageHandler().getPartitionKeys(this); + tablePartCols = getStorageHandler().getPartitionKeys(this); } else { - cachedPartCols = getNativePartCols(); + tablePartCols = getNativePartCols(); } - return cachedPartCols; + return tablePartCols; } private boolean isTableTypeSet() { @@ -758,18 +761,44 @@ private boolean isField(String col) { return false; } + private void fillColumnIndexByName() { + inputColumnIndexByName = new HashMap<>(); + List fsList = new ArrayList<>(getColsInternal(false)); + if (!isNonNative()) { + fsList.addAll(getNativePartCols()); + } + for (int i = 0; i < fsList.size(); i++) { + inputColumnIndexByName.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i))); + } + } + + public int getColumnIndexByName(String colName) { + if (inputColumnIndexByName == null) { + fillColumnIndexByName(); + } + return inputColumnIndexByName.get(colName.toLowerCase()).getLeft(); + } + + public FieldSchema getFieldSchemaByName(String colName) { + if (inputColumnIndexByName == null) { + fillColumnIndexByName(); + } + return inputColumnIndexByName.get(colName).getRight(); + } + public List getCols() { if (!isNonNative()) { return getColsInternal(false); - } - List nonPartFields = new ArrayList<>(); - Set partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); - for (FieldSchema field : getColsInternal(false)) { - if (!partFieldsName.contains(field.getName())) { - nonPartFields.add(field); + } else { + List nonPartFields = new ArrayList<>(); + Set partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); + for (FieldSchema field : getColsInternal(false)) { + if (!partFieldsName.contains(field.getName())) { + nonPartFields.add(field); + } } + return nonPartFields; } - return nonPartFields; } public List getColsForMetastore() { @@ -802,9 +831,11 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - ArrayList allCols = new ArrayList<>(getCols()); - allCols.addAll(getPartCols()); - return allCols; + List fsList = new ArrayList<>(getColsInternal(false)); + if (!isNonNative()) { + fsList.addAll(getNativePartCols()); + } + return fsList; } public void setPartCols(List partCols) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 9109f9cb6086..373e7bc0cf98 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -262,8 +262,10 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator 0) { throw new SemanticException( "Dynamic partition columns should not come before static partition columns."); } exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName)); srcType = exprNodeDesc.getTypeInfo(); + if (!tbl.hasNonNativePartitionSupport()) { + dynamicPartBegin--; + } } // 3. dynamic partition columns else { - dynamicPartBegin++; - ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin); + ColumnInfo col = columns.get(tbl.getColumnIndexByName(partColName) + dynamicPartBegin); exprNodeDesc = new ExprNodeColumnDesc(col); srcType = col.getType(); } - TypeInfo destType = selRSSig.get(this.columns.size() + i).getType(); + TypeInfo destType = selRSSig.get(index).getType(); if (!srcType.equals(destType)) { // This may be possible when srcType is string but destType is integer exprNodeDesc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() .createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType); } colList.add(exprNodeDesc); - String internalName = selRS.getColumnNames().get(this.columns.size() + i); + String internalName = selRS.getColumnNames().get(index); columnNames.add(internalName); columnExprMap.put(internalName, exprNodeDesc); - signature.add(selRSSig.get(this.columns.size() + i)); + signature.add(selRSSig.get(index)); } operator.setConf(new SelectDesc(colList, columnNames)); operator.setColumnExprMap(columnExprMap); From aad2a39787f38c0d82e9c818f907d83b087ca674 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Sun, 24 May 2026 19:11:28 +0530 Subject: [PATCH 10/20] corrected implementation --- .../java/org/apache/hadoop/hive/ql/metadata/Table.java | 4 ++++ .../org/apache/hadoop/hive/ql/parse/CalcitePlanner.java | 4 +++- .../hive/ql/parse/ColumnStatsAutoGatherContext.java | 9 ++++----- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index e379f39aa29c..3a93a6d7f421 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -786,6 +786,10 @@ public FieldSchema getFieldSchemaByName(String colName) { return inputColumnIndexByName.get(colName).getRight(); } + public List getStorageSchemaCols() { + return getColsInternal(false); + } + public List getCols() { if (!isNonNative()) { return getColsInternal(false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ffc1673f3670..68ebf8be17fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -353,6 +353,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; @@ -3013,7 +3014,8 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tabMetaData.getPartCols()) { + List partKeys = Objects.requireNonNullElse(tabMetaData.getPartitionKeys(), new ArrayList<>()); + for (FieldSchema part_col : partKeys) { colName = part_col.getName(); colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 373e7bc0cf98..3ff9d30955d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -265,7 +265,7 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator columnNameToIndex) { + private Integer getSelRSColumnIndex(String columnName, ColumnInfo col, Map columnNameToIndex) { ObjectInspector objectInspector = col.getObjectInspector(); if (objectInspector == null) { return null; @@ -331,7 +330,7 @@ private Integer getSelRSColumnIndex(int i, ColumnInfo col, Map if (!columnSupported) { return null; } - return columnNameToIndex.get(this.columns.get(i).getName()); + return columnNameToIndex.get(columnName); } public String getCompleteName() { From 27c649e9e088f7971fd776b011056d2491953234 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Mon, 25 May 2026 01:58:44 +0530 Subject: [PATCH 11/20] updated describe implementation and outputs --- .../alter_multi_part_table_to_iceberg.q.out | 15 --------------- .../positive/alter_part_table_to_iceberg.q.out | 5 ----- .../src/test/results/positive/col_stats.q.out | 4 ++-- .../ddl/table/info/desc/DescTableOperation.java | 2 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 5 ----- 5 files changed, 3 insertions(+), 28 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out index 910e48e4214e..c9697302c550 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out @@ -177,8 +177,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_orc # col_name data_type comment a int -b string -c string # Partition Information # col_name data_type comment @@ -453,8 +451,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet # col_name data_type comment a int -b string -c string # Partition Information # col_name data_type comment @@ -729,8 +725,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_avro # col_name data_type comment a int -b string -c string # Partition Information # col_name data_type comment @@ -1066,9 +1060,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_orc_mixed # col_name data_type comment a int -b double -c int -d string # Partition Information # col_name data_type comment @@ -1513,9 +1504,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet_mixed # col_name data_type comment a int -b double -c int -d string # Partition Information # col_name data_type comment @@ -1960,9 +1948,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_avro_mixed # col_name data_type comment a int -b double -c int -d string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out index 55bfee6eb031..8646ebb240e7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out @@ -137,7 +137,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_orc # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -415,7 +414,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -770,7 +768,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet_int # col_name data_type comment a int -b int # Partition Information # col_name data_type comment @@ -1125,7 +1122,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet_double # col_name data_type comment a int -b double # Partition Information # col_name data_type comment @@ -1426,7 +1422,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_avro # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out index 6810c72c93e8..ef5c4410fbb6 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out @@ -347,7 +347,7 @@ PREHOOK: Input: default@tbl_ice_puffin POSTHOOK: query: desc formatted tbl_ice_puffin C POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_puffin -col_name C +col_name c data_type int min 52 max 56 @@ -358,7 +358,7 @@ max_col_len num_trues num_falses bit_vector HL -comment +comment Transform: identity COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a) PREHOOK: type: QUERY diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index 9086ad90c677..8d45892e17fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -200,7 +200,7 @@ private void getColumnDataColPathSpecified(Table table, Partition part, List tableProps = table.getParameters() == null ? new HashMap<>() : table.getParameters(); - if (partitionCol != null) { + if (partitionCol != null && !table.hasNonNativePartitionSupport()) { addStatsForPartitionKeyColumn(table, colStats, colNames, tableProps, partitionCol); } else { addStatsForRegularColumn(table, colStats, colName, tableProps); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 50d02acf2464..c03fc60d049f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12020,8 +12020,6 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { deserializer.handleJobLevelConfiguration(conf); List fields = rowObjectInspector .getAllStructFieldRefs(); - Set partCols = tab.hasNonNativePartitionSupport() ? - Sets.newHashSet(tab.getPartColNames()) : Collections.emptySet(); for (int i = 0; i < fields.size(); i++) { /** * if the column is a skewed column, use ColumnInfo accordingly @@ -12029,9 +12027,6 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i) .getFieldObjectInspector()), alias, false); - if (partCols.contains(colInfo.getInternalName())) { - colInfo.setHiddenPartitionCol(true); - } colInfo.setSkewedCol(isSkewedCol(alias, qb, fields.get(i).getFieldName())); rwsch.put(alias, fields.get(i).getFieldName(), colInfo); } From 11c279db36f1dea3e9d095e66921dabd484fe6d9 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Mon, 25 May 2026 13:35:59 +0530 Subject: [PATCH 12/20] updated api and test --- .../hive/TestHiveIcebergStorageHandlerNoScan.java | 14 ++++++++++---- .../llap/hadoop_catalog_create_table.q.out | 10 ---------- .../org/apache/hadoop/hive/ql/metadata/Table.java | 10 +++++++++- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index a320148858e0..4284ce7a26e2 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -946,7 +946,8 @@ public void testCreateTableWithoutColumnComments() { @Test public void testCreatePartitionedTableWithColumnComments() { TableIdentifier identifier = TableIdentifier.of("default", "partitioned_with_comment_table"); - String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", null}; + String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", + "Transform: identity"}; shell.executeStatement("CREATE EXTERNAL TABLE partitioned_with_comment_table (" + "t_int INT COMMENT 'int column', " + "t_string STRING COMMENT 'string column', " + @@ -959,13 +960,18 @@ public void testCreatePartitionedTableWithColumnComments() { List rows = shell.executeStatement("DESCRIBE default.partitioned_with_comment_table"); List columns = icebergTable.schema().columns(); + List partitionColumns = List.of("t_string_3", "t_string_4"); // The partition transform information and partition information is 6 extra lines, and 4 more line for the columns Assert.assertEquals(columns.size() + 10, rows.size()); for (int i = 0; i < columns.size(); i++) { Types.NestedField field = columns.get(i); - Assert.assertArrayEquals(new Object[] {field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), - field.doc() != null ? field.doc() : ""}, rows.get(i)); - Assert.assertEquals(expectedDoc[i], field.doc()); + String fieldDoc = field.doc(); + if (fieldDoc == null && partitionColumns.contains(field.name())) { + fieldDoc = "Transform: identity"; + } + Assert.assertArrayEquals(new Object[]{field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), + fieldDoc != null ? fieldDoc : ""}, rows.get(i)); + Assert.assertEquals(expectedDoc[i], fieldDoc); } } diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out index 5036ca420f88..1543f272a9a9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out @@ -89,8 +89,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -172,8 +170,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -263,8 +259,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -352,8 +346,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -427,8 +419,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 3a93a6d7f421..342f846fca2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -625,7 +625,15 @@ public List getPartCols() { return tablePartCols; } if (isTableTypeSet() && hasNonNativePartitionSupport()) { - tablePartCols = getStorageHandler().getPartitionKeys(this); + List partCols = getStorageHandler().getPartitionKeys(this); + for (FieldSchema partCol : partCols) { + FieldSchema storageSchemaField = getFieldSchemaByName(partCol.getName()); + String storageSchemaComment = storageSchemaField.getComment(); + if (storageSchemaComment != null) { + partCol.setComment(storageSchemaComment); + } + } + tablePartCols = partCols; } else { tablePartCols = getNativePartCols(); } From 5b4e4766c8f26335abe0049ea4cb9d14d817c0e0 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Mon, 25 May 2026 16:01:31 +0530 Subject: [PATCH 13/20] updated update implementation --- .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 4 ++-- .../hive/ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +++--- .../ql/parse/rewrite/CopyOnWriteUpdateRewriter.java | 4 +++- .../hive/ql/parse/rewrite/SplitUpdateRewriter.java | 10 ++++++---- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 3a4b1f1f8d7d..43c4eb9361da 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -1536,10 +1536,10 @@ public List acidSelectColumns(org.apache.hadoop.hive.ql.metadata.Ta case DELETE -> // TODO: make it configurable whether we want to include the table columns in the select query. // It might make delete writes faster if we don't have to write out the row object - ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols()); + ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getStorageSchemaCols()); case UPDATE -> shouldOverwrite(table, operation) ? ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA : - ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols()); + ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getStorageSchemaCols()); case MERGE -> ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA; default -> ImmutableList.of(); }; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 9bf6269334e7..622e28210f94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -107,7 +107,7 @@ private boolean shouldRewrite(ASTNode tree) { */ private static FieldSchemas getStatsEligibleFieldSchemas(Table tbl) { List result = new ArrayList<>(); - for (FieldSchema col : tbl.getCols()) { + for (FieldSchema col : tbl.getStorageSchemaCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); @@ -230,12 +230,12 @@ protected static List getFieldSchemasByColName(Table tbl, List col); } List result = new ArrayList<>(); - List tableColNames = new FieldSchemas(tbl.getCols()).getColName(); + List tableColNames = new FieldSchemas(tbl.getStorageSchemaCols()).getColName(); for (String colName : colNames) { FieldSchema fs = specifiedColsMap.get(colName.toLowerCase()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java index ae57198860c3..187547430e5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java @@ -21,7 +21,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.RowLineageUtils; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -90,7 +92,7 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB } Map setColExprs = new HashMap<>(updateBlock.getSetCols().size()); - List nonPartCols = updateBlock.getTargetTable().getCols(); + List nonPartCols = updateBlock.getTargetTable().getStorageSchemaCols(); for (int i = 0; i < nonPartCols.size(); i++) { if (columnOffset > 0 || i > 0) { sqlGenerator.append(','); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java index d14ddc7eb485..1a29f8486e85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java @@ -20,7 +20,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -65,10 +67,10 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB List deleteValues = sqlGenerator.getDeleteValues(OPERATION); int columnOffset = deleteValues.size(); - List insertValues = new ArrayList<>(updateBlock.getTargetTable().getCols().size()); + List insertValues = new ArrayList<>(updateBlock.getTargetTable().getStorageSchemaCols().size()); boolean first = true; - List nonPartCols = updateBlock.getTargetTable().getCols(); + List nonPartCols = updateBlock.getTargetTable().getStorageSchemaCols(); for (int i = 0; i < nonPartCols.size(); i++) { if (first) { first = false; @@ -98,8 +100,8 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB insertValues.add(sqlGenerator.qualify(identifier)); } - if (updateBlock.getTargetTable().getPartCols() != null) { - updateBlock.getTargetTable().getPartCols().forEach( + if (updateBlock.getTargetTable().getPartitionKeys() != null) { + updateBlock.getTargetTable().getPartitionKeys().forEach( fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); } addRowLineageColumnsForUpdate(updateBlock.getTargetTable(), sqlGenerator, insertValues, conf); From 63c2ac263733e2beb041fbe35e7401a1b15ee9e5 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Mon, 25 May 2026 20:28:14 +0530 Subject: [PATCH 14/20] updated partition pruning and query rewriting --- .../DynamicPartitionPruningOptimization.java | 14 ++++++++------ .../hive/ql/parse/RewriteSemanticAnalyzer.java | 7 ++++--- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +++++++- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index dba737e382c0..d7efbd1e838e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -169,7 +170,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje Table table = ts.getConf().getTableMetadata(); boolean nonEquiJoin = isNonEquiJoin(ctx.parent); - if (table != null && table.isPartitionKey(column) && !nonEquiJoin) { + // Non-native tables (e.g. Iceberg) share a single table location for all partitions, so + // path-based DPP cannot work; prefer split-level pruning when the storage handler supports it. + if (table != null && table.isNonNative() && !nonEquiJoin + && table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) { + String columnType = table.getFieldSchemaByName(column).getType(); + generateEventOperatorPlan(ctx, parseContext, ts, column, columnType, ctx.parent); + } else if (table != null && table.isPartitionKey(column) && !nonEquiJoin) { String columnType = table.getPartColByName(column).getType(); String alias = ts.getConf().getAlias(); PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts); @@ -191,11 +198,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje // all partitions have been statically removed LOG.debug("No partition pruning necessary."); } - } else if (table.isNonNative() && - table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) { - generateEventOperatorPlan(ctx, parseContext, ts, column, - table.getCols().stream().filter(e -> e.getName().equals(column)). - map(e -> e.getType()).findFirst().get(), ctx.parent); } else { // semijoin LOG.debug("Column " + column + " is not a partition column"); if (semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 101f6b1fc3d8..a21ce9e60cef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -43,6 +43,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; /** @@ -117,9 +118,9 @@ protected void analyzeRewrittenTree(ASTNode rewrittenTree, Context rewrittenCtx) */ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) throws SemanticException { String columnName = normalizeColName(colName.getText()); - + List partKeys = Objects.requireNonNullElse(targetTable.getPartitionKeys(), new ArrayList<>()); // Make sure this isn't one of the partitioning columns, that's not supported. - for (FieldSchema fschema : targetTable.getPartCols()) { + for (FieldSchema fschema : partKeys) { if (fschema.getName().equalsIgnoreCase(columnName)) { throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg()); } @@ -129,7 +130,7 @@ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) thr throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_BUCKET_VALUE, columnName); } boolean foundColumnInTargetTable = false; - for (FieldSchema col : targetTable.getCols()) { + for (FieldSchema col : targetTable.getStorageSchemaCols()) { if (columnName.equalsIgnoreCase(col.getName())) { foundColumnInTargetTable = true; break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c03fc60d049f..dd68b572c2f0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12020,6 +12020,8 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { deserializer.handleJobLevelConfiguration(conf); List fields = rowObjectInspector .getAllStructFieldRefs(); + Set partCols = tab.hasNonNativePartitionSupport() ? + Sets.newHashSet(tab.getPartColNames()) : Collections.emptySet(); for (int i = 0; i < fields.size(); i++) { /** * if the column is a skewed column, use ColumnInfo accordingly @@ -12027,6 +12029,9 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { ColumnInfo colInfo = new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i) .getFieldObjectInspector()), alias, false); + if (partCols.contains(colInfo.getInternalName())) { + colInfo.setHiddenPartitionCol(true); + } colInfo.setSkewedCol(isSkewedCol(alias, qb, fields.get(i).getFieldName())); rwsch.put(alias, fields.get(i).getFieldName(), colInfo); } @@ -12035,7 +12040,8 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns - for (FieldSchema partCol : tab.getPartCols()) { + List partKeys = Objects.requireNonNullElse(tab.getPartitionKeys(),new ArrayList<>()); + for (FieldSchema partCol : partKeys) { LOG.trace("Adding partition col: " + partCol); rwsch.put(alias, partCol.getName(), new ColumnInfo(partCol.getName(), TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), alias, true)); From 2742cf85b3742e41117b1a199fe1fd8fa9a8e6dc Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Mon, 25 May 2026 23:31:49 +0530 Subject: [PATCH 15/20] changes related to metatable --- .../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 6 ++++++ .../llap/iceberg_major_compaction_partition_evolution.q.out | 4 ---- .../ql/optimizer/DynamicPartitionPruningOptimization.java | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 43c4eb9361da..fdad33a2583b 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -2130,6 +2130,9 @@ public List getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm } public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (hmsTable.getMetaTable() != null) { + return false; + } if (!hmsTable.getTTable().isSetId()) { return false; } @@ -2275,6 +2278,9 @@ public boolean canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table @Override public List getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (hmsTable.getMetaTable() != null) { + return Collections.emptyList(); + } if (!hmsTable.getTTable().isSetId()) { return Collections.emptyList(); } diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out index 981b8269ad51..115ad8903ffa 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out @@ -269,9 +269,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment @@ -866,9 +864,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index d7efbd1e838e..46b3ff129f49 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -176,7 +176,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje && table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) { String columnType = table.getFieldSchemaByName(column).getType(); generateEventOperatorPlan(ctx, parseContext, ts, column, columnType, ctx.parent); - } else if (table != null && table.isPartitionKey(column) && !nonEquiJoin) { + } else if (table != null && table.isPartitionKey(column) && !nonEquiJoin + && !table.hasNonNativePartitionSupport()) { String columnType = table.getPartColByName(column).getType(); String alias = ts.getConf().getAlias(); PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts); From 24542d12090d3fd95ebcbc0e5296d21b3632ac6b Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Tue, 26 May 2026 11:39:52 +0530 Subject: [PATCH 16/20] corrected alter and semantic analyzer implementation --- ...mpaction_single_partition_with_evolution.q.out | 15 --------------- .../hadoop/hive/ql/ddl/table/AlterTableUtils.java | 6 +++++- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out index 1d1143f4b635..5e24153c32a4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out @@ -158,9 +158,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -275,9 +272,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -392,9 +386,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -508,9 +499,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -624,9 +612,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java index 17a964a44583..ed1945ba657e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.ddl.table; +import java.util.ArrayList; +import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -75,7 +78,8 @@ public static boolean isSchemaEvolutionEnabled(Table table, Configuration conf) } public static boolean isFullPartitionSpec(Table table, Map partitionSpec) { - for (FieldSchema partitionCol : table.getPartCols()) { + List partKeys = Objects.requireNonNullElse(table.getPartitionKeys(), new ArrayList<>()); + for (FieldSchema partitionCol : partKeys) { if (partitionSpec.get(partitionCol.getName()) == null) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index dd68b572c2f0..98c3b4d64dd9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -5166,7 +5166,7 @@ RowResolver handleInsertStatementSpec(List col_list, String dest, } List newColList = new ArrayList(); colListPos = 0; - List targetTableCols = target != null ? target.getCols() : partition.getCols(); + List targetTableCols = target != null ? target.getStorageSchemaCols() : partition.getCols(); List targetTableColNames = new ArrayList(); List targetTableColTypes = new ArrayList(); for(FieldSchema fs : targetTableCols) { From 14467d892bd8bc643830aebf43f4c0f1b2517792 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Tue, 26 May 2026 16:49:45 +0530 Subject: [PATCH 17/20] updated merge implementation and test output --- .../llap/iceberg_create_locally_zordered_table.q.out | 1 - ...ceberg_major_compaction_partition_evolution2.q.out | 2 -- ...major_compaction_partition_evolution_ordered.q.out | 2 -- ...tion_partition_evolution_w_dyn_spec_w_filter.q.out | 4 ---- ...ction_partition_evolution_w_id_spec_w_filter.q.out | 4 ---- .../llap/iceberg_major_compaction_partitioned.q.out | 4 ---- .../iceberg_major_compaction_schema_evolution.q.out | 2 -- .../iceberg_major_compaction_single_partition.q.out | 9 --------- ..._compaction_single_partition_with_evolution2.q.out | 4 ---- .../llap/iceberg_minor_compaction_bucket.q.out | 2 -- ...iceberg_minor_compaction_partition_evolution.q.out | 3 --- .../positive/llap/vectorized_iceberg_read_mixed.q.out | 4 ++-- .../positive/llap/vectorized_iceberg_read_orc.q.out | 4 ++-- .../llap/vectorized_iceberg_read_parquet.q.out | 4 ++-- .../storage/compact/AlterTableCompactAnalyzer.java | 2 +- .../hadoop/hive/ql/parse/MergeSemanticAnalyzer.java | 5 ++--- .../hadoop/hive/ql/parse/rewrite/MergeRewriter.java | 11 ++++++----- .../ql/parse/rewrite/sql/MultiInsertSqlGenerator.java | 2 +- 18 files changed, 16 insertions(+), 53 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out index 42f0631140fc..6308cdcb5ff5 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out @@ -640,7 +640,6 @@ POSTHOOK: Input: default@zorder_tsdl_test # col_name data_type comment ts timestamp dd double -ll int # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out index 3af63c58b6c2..7235f28d6c0b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out @@ -138,7 +138,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -225,7 +224,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out index 924c6d9953fa..7e317557d953 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out @@ -130,7 +130,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint company_id bigint @@ -280,7 +279,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint company_id bigint diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out index 61a931d74a33..296194cb8912 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out @@ -225,8 +225,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment event_id int -event_time timestamp with local time zone -event_src string # Partition Information # col_name data_type comment @@ -336,8 +334,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment event_id int -event_time timestamp with local time zone -event_src string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out index bc786c1e8dc8..2a9c688f714d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out @@ -176,9 +176,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment @@ -289,9 +287,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out index 5508bdca124c..33407f123d97 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out @@ -173,7 +173,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -276,7 +275,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -514,7 +512,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -621,7 +618,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out index 440f6334f114..2a1bfe817775 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out @@ -208,7 +208,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment fname string last_name string -dept_id bigint address string # Partition Information @@ -312,7 +311,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment fname string last_name string -dept_id bigint address string # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out index 4e120fb8c50d..90f23c830e76 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out @@ -177,9 +177,6 @@ POSTHOOK: Input: default@ice_orc_wo_evo # col_name data_type comment first_name string last_name string -dept_id bigint -city string -registration_date date # Partition Information # col_name data_type comment @@ -289,9 +286,6 @@ POSTHOOK: Input: default@ice_orc_wo_evo # col_name data_type comment first_name string last_name string -dept_id bigint -city string -registration_date date # Partition Information # col_name data_type comment @@ -408,9 +402,6 @@ POSTHOOK: Input: default@ice_orc_wo_evo # col_name data_type comment first_name string last_name string -dept_id bigint -city string -registration_date date # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out index b01185bb6911..1341c5356ef8 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out @@ -106,7 +106,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information @@ -200,7 +199,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information @@ -308,7 +306,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information @@ -416,7 +413,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out index fa5dcd05e101..8937967b79a7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out @@ -65,7 +65,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket_big # col_name data_type comment id string -key int value string # Partition Information @@ -183,7 +182,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket_big # col_name data_type comment id string -key int value string # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out index 62070278505d..0d5d5d70771f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out @@ -94,7 +94,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -177,7 +176,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -286,7 +284,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out index 5f31e752db3f..afe15f60d91c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out @@ -876,10 +876,10 @@ PREHOOK: Input: default@tbl_ice_mixed_parted POSTHOOK: query: describe tbl_ice_mixed_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_mixed_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out index f1a017215639..c215c4005a46 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out @@ -605,10 +605,10 @@ PREHOOK: Input: default@tbl_ice_orc_parted POSTHOOK: query: describe tbl_ice_orc_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_orc_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out index 2feda580b67a..38a56ad9bdb7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out @@ -508,10 +508,10 @@ PREHOOK: Input: default@tbl_ice_parquet_parted POSTHOOK: query: describe tbl_ice_parquet_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_parquet_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java index b5bb11bcea2a..447ac10aeb22 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java @@ -99,7 +99,7 @@ protected void analyzeCommand(TableName tableName, Map partition Table table; try { table = getDb().getTable(tableName); - for (FieldSchema fs : table.getCols()) { + for (FieldSchema fs : table.getStorageSchemaCols()) { TypeInfo columnType = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); rwsch.put(tableName.getTable(), fs.getName(), new ColumnInfo(fs.getName(), columnType, null, true)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index ddde37433b5a..c93b4a35a79d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -239,7 +239,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause //insert into target (p1) select current_date(), 5, c3, p1 where .... //since we take the RHS of set exactly as it was in Input, we don't need to deal with quoting/escaping column/table //names - List nonPartCols = targetTable.getCols(); + List nonPartCols = targetTable.getStorageSchemaCols(); Map colNameToDefaultConstraint = getColNameToDefaultValueMap(targetTable); for (FieldSchema fs : nonPartCols) { String name = fs.getName(); @@ -430,8 +430,7 @@ private static final class OnClauseAnalyzer { OnClauseAnalyzer(ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery, HiveConf conf, String onClauseAsString) { this.onClause = onClause; - allTargetTableColumns.addAll(targetTable.getCols()); - allTargetTableColumns.addAll(targetTable.getPartCols()); + allTargetTableColumns.addAll(targetTable.getAllCols()); this.targetTableNameInSourceQuery = unescapeIdentifier(targetTableNameInSourceQuery); this.conf = conf; this.onClauseAsString = onClauseAsString; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index cb759a11d080..366b26a296f1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -238,10 +238,9 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau protected void addValues(Table targetTable, String targetAlias, Map newValues, List values) { - UnaryOperator formatter = name -> String.format("%s.%s", targetAlias, + UnaryOperator formatter = name -> String.format("%s.%s", targetAlias, HiveUtils.unparseIdentifier(name, conf)); - - for (FieldSchema fieldSchema : targetTable.getCols()) { + for (FieldSchema fieldSchema : targetTable.getStorageSchemaCols()) { if (newValues.containsKey(fieldSchema.getName())) { String rhsExp = newValues.get(fieldSchema.getName()); values.add(getRhsExpValue(rhsExp, formatter.apply(fieldSchema.getName()))); @@ -250,8 +249,10 @@ protected void addValues(Table targetTable, String targetAlias, Map values.add( - formatter.apply(fieldSchema.getName()))); + if (!targetTable.getPartitionKeys().isEmpty()) { + targetTable.getPartitionKeys().forEach(fieldSchema -> values.add( + formatter.apply(fieldSchema.getName()))); + } } protected String getRhsExpValue(String newValue, String alias) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java index 7587daf13055..c7b187c76dd3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java @@ -148,7 +148,7 @@ public void removeLastChar() { } public void appendPartColsOfTargetTableWithComma(String alias) { - if (targetTable.getPartCols() == null || targetTable.getPartCols().isEmpty()) { + if (targetTable.getPartitionKeys() == null || targetTable.getPartitionKeys().isEmpty()) { return; } queryStr.append(','); From ebb9e0124b2b34e8bc610338b8c1cb3c407094a8 Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Tue, 26 May 2026 19:13:31 +0530 Subject: [PATCH 18/20] updated ctas create and tests output --- .../results/positive/bucket_map_join_9.q.out | 16 +- .../ctas_iceberg_partitioned_orc.q.out | 2 - ...te_iceberg_copy_on_write_partitioned.q.out | 110 +++-- .../positive/desc_ice_tbl_part_spec.q.out | 2 + .../positive/describe_iceberg_table.q.out | 17 +- .../positive/dynamic_partition_writes.q.out | 16 +- .../positive/iceberg_drop_column.q.out | 4 +- .../iceberg_insert_into_partition.q.out | 168 +++---- ...erg_insert_into_partition_transforms.q.out | 80 ++- ...insert_into_partition_with_evolution.q.out | 3 +- .../iceberg_insert_overwrite_partition.q.out | 143 +++--- ...nsert_overwrite_partition_transforms.q.out | 66 ++- .../results/positive/iceberg_load_data.q.out | 44 +- .../llap/iceberg_bucket_map_join_1.q.out | 92 ++-- .../llap/iceberg_bucket_map_join_2.q.out | 64 +-- .../llap/iceberg_bucket_map_join_3.q.out | 44 +- .../llap/iceberg_bucket_map_join_4.q.out | 52 +- .../llap/iceberg_bucket_map_join_5.q.out | 44 +- .../llap/iceberg_bucket_map_join_6.q.out | 32 +- .../llap/iceberg_bucket_map_join_7.q.out | 168 +++---- .../llap/iceberg_bucket_map_join_8.q.out | 100 ++-- .../iceberg_row_lineage_compactions.q.out | 2 +- .../positive/mv_iceberg_partitioned_orc.q.out | 2 - .../mv_iceberg_partitioned_orc2.q.out | 4 - .../src/test/results/positive/row_count.q.out | 4 - .../positive/show_partitions_test.q.out | 5 - .../truncate_partitioned_iceberg_table.q.out | 2 - ...te_iceberg_copy_on_write_partitioned.q.out | 462 +++++++++--------- .../update_iceberg_partitioned_avro.q.out | 4 +- .../update_iceberg_partitioned_orc.q.out | 4 +- .../update_iceberg_partitioned_parquet.q.out | 4 +- .../vectorized_iceberg_merge_mixed.q.out | 54 +- .../vectorized_iceberg_read_mixed.q.out | 4 +- .../vectorized_iceberg_read_orc.q.out | 4 +- .../vectorized_iceberg_read_parquet.q.out | 4 +- .../table/create/CreateTableOperation.java | 2 +- .../hadoop/hive/ql/exec/DDLPlanUtils.java | 2 +- 37 files changed, 898 insertions(+), 932 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out b/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out index 8153bdd697f3..23b2d41d3dd0 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out @@ -34,21 +34,21 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized - File Output Operator [FS_53] - Map Join Operator [MAPJOIN_52] (rows=2 width=530) - BucketMapJoin:true,Conds:SEL_51._col1, _col2=RS_49._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + File Output Operator [FS_23] + Map Join Operator [MAPJOIN_22] (rows=2 width=530) + BucketMapJoin:true,Conds:SEL_21._col1, _col2=RS_19._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 2 [CUSTOM_EDGE] vectorized - MULTICAST [RS_49] + MULTICAST [RS_19] PartitionCols:_col2, _col1 - Select Operator [SEL_48] (rows=2 width=265) + Select Operator [SEL_18] (rows=2 width=265) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_47] (rows=2 width=265) + Filter Operator [FIL_17] (rows=2 width=265) predicate:(id is not null and part is not null) TableScan [TS_3] (rows=2 width=265) default@tbl,tbl2,Tbl:COMPLETE,Col:COMPLETE,Output:["foid","part","id"] - <-Select Operator [SEL_51] (rows=2 width=265) + <-Select Operator [SEL_21] (rows=2 width=265) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_50] (rows=2 width=265) + Filter Operator [FIL_20] (rows=2 width=265) predicate:(id is not null and part is not null) TableScan [TS_0] (rows=2 width=265) default@tbl,tbl,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:100,Grouping Partition Columns:["id","part"],Output:["foid","part","id"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out index 0d1700ff07a9..eba54dc24523 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out @@ -273,8 +273,6 @@ POSTHOOK: query: describe formatted tbl_ice POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice # col_name data_type comment -a int -b string c int # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out index 6e6d2da48e8c..decc037d088b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out @@ -67,12 +67,12 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[227][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[223][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[229][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[221][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[231][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[220][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[222][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[218][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[216][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[226][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -753,12 +753,12 @@ STAGE PLANS: Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[227][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[223][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[229][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[221][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[231][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[220][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[222][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[218][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[216][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[226][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product PREHOOK: query: delete from tbl_ice where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -799,7 +799,7 @@ POSTHOOK: query: insert into tbl_ice_other values (10, 'ten'), (333, 'hundred') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_other -Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1175,12 +1175,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -1207,7 +1219,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1251,7 +1263,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1273,13 +1285,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) - Reducer 11 <- Reducer 9 (SIMPLE_EDGE) - Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 5 (CONTAINS) @@ -1365,19 +1377,6 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 10 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -1396,7 +1395,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 12 + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -1412,7 +1411,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1458,6 +1457,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 13 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -1604,17 +1616,6 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: _col1 (type: int) minReductionHashAggr: 0.99 @@ -1643,6 +1644,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -1679,7 +1691,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[155][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out b/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out index 9ae5ea93e5f8..e75c252fcfdd 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out @@ -74,6 +74,8 @@ a int b string c int d string +c int Transform: identity +d string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out index fb1cdbcaf12a..b494e9f54e62 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out @@ -109,13 +109,6 @@ POSTHOOK: query: DESCRIBE FORMATTED ice_t_transform POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_transform # col_name data_type comment -year_field date -month_field date -day_field date -hour_field timestamp -truncate_field string -bucket_field int -identity_field int # Partition Information # col_name data_type comment @@ -182,13 +175,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_transform_prop # col_name data_type comment id int -year_field date -month_field date -day_field date -hour_field timestamp -truncate_field string -bucket_field int -identity_field int # Partition Information # col_name data_type comment @@ -255,7 +241,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_identity_part # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -338,7 +323,7 @@ max_col_len num_trues num_falses bit_vector -comment +comment Transform: year COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bucket_field\":\"true\",\"day_field\":\"true\",\"hour_field\":\"true\",\"identity_field\":\"true\",\"month_field\":\"true\",\"truncate_field\":\"true\",\"year_field\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED ice_t_transform_prop id PREHOOK: type: DESCTABLE diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out index 0c58a118a7ce..cd09db327734 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out @@ -806,7 +806,7 @@ Stage-3 Group By Operator [GBY_15] (rows=21 width=584) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["min(id)","max(id)","count(1)","count(id)","compute_bit_vector_hll(id)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"],keys:iceberg_truncate(id, 2) Select Operator [SEL_14] (rows=22 width=87) - Output:["id","ccy"] + Output:["ccy","id"] Please refer to the previous Select Operator [SEL_12] PREHOOK: query: insert into table tbl_target_truncate_int select a, b from tbl_src @@ -1101,7 +1101,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=672) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_date)","max(date_time_date)","count(date_time_date)","compute_bit_vector_hll(date_time_date)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_year(date_time_date) Select Operator [SEL_7] (rows=1 width=240) - Output:["id","date_time_date","year_partition"] + Output:["id","year_partition","date_time_date"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_year_date values (88669, '2018-05-27', 2018), (40568, '2018-02-12', 2018), (40568, '2018-07-03', 2018) @@ -1187,7 +1187,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_year(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_year_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) @@ -1273,7 +1273,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=672) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_date)","max(date_time_date)","count(date_time_date)","compute_bit_vector_hll(date_time_date)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_month(date_time_date) Select Operator [SEL_7] (rows=1 width=240) - Output:["id","date_time_date","year_partition"] + Output:["id","year_partition","date_time_date"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_month_date values (88669, '2018-05-27', 2018), (40568, '2018-02-12', 2018), (40568, '2018-07-03', 2018) @@ -1359,7 +1359,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_month(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_month_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) @@ -1445,7 +1445,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=672) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_date)","max(date_time_date)","count(date_time_date)","compute_bit_vector_hll(date_time_date)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_day(date_time_date) Select Operator [SEL_7] (rows=1 width=240) - Output:["id","date_time_date","year_partition"] + Output:["id","year_partition","date_time_date"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_day_date values (88669, '2018-05-27', 2018), (40568, '2018-02-12', 2018), (40568, '2018-07-03', 2018) @@ -1531,7 +1531,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_day(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_day_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) @@ -1617,7 +1617,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_hour(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_hour_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out index 74d891eefc1e..430591d0d01b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out @@ -27,8 +27,8 @@ POSTHOOK: Input: default@ice_tbl col_name data_type comment strcol string intcol int -pcol string -datecol date +pcol string Transform: identity +datecol date Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out index c68cb256cd60..86a8569e9ad9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out @@ -62,7 +62,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('1') (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -179,7 +179,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('2') (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -289,22 +289,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('3') (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -315,14 +315,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -400,22 +400,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('04') (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -426,14 +426,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -479,7 +479,6 @@ POSTHOOK: Input: default@ice_parquet_int # col_name data_type comment strcol string intcol int -pcol int # Partition Information # col_name data_type comment @@ -603,7 +602,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'CA' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -721,7 +720,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'CA' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -839,7 +838,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'TX' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1086,7 +1085,7 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 3252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 12 Data size: 3252 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1209,7 +1208,7 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 6504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'CA' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 24 Data size: 6504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1332,7 +1331,7 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 13008 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'TX' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 48 Data size: 13008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1457,7 +1456,7 @@ STAGE PLANS: Statistics: Num rows: 96 Data size: 26208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 96 Data size: 26208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1582,7 +1581,7 @@ STAGE PLANS: Statistics: Num rows: 192 Data size: 52416 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 192 Data size: 52416 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1676,8 +1675,6 @@ POSTHOOK: Input: default@ice_parquet_string # col_name data_type comment name string age int -country string -state string # Partition Information # col_name data_type comment @@ -2187,7 +2184,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-31' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2304,7 +2301,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2421,7 +2418,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-12' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2544,22 +2541,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-13' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.8333333 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2570,14 +2567,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2654,22 +2651,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-02' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.9166667 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7872 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2680,14 +2677,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7872 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 11808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 11808 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2733,8 +2730,6 @@ POSTHOOK: Input: default@ice_parquet_string # col_name data_type comment name string age int -country string -state string # Partition Information # col_name data_type comment @@ -2884,7 +2879,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('34567890123456787') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3001,7 +2996,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('12346577399277578') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3118,7 +3113,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('45637829068876994') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3241,22 +3236,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('45637829068876994') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.8333333 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -3267,14 +3262,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3351,22 +3346,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('12346577399277578') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 12 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.9166667 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -3377,14 +3372,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 11232 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 11232 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3430,7 +3425,6 @@ POSTHOOK: Input: default@ice_parquet_bigint # col_name data_type comment datecol date intcol int -pcol bigint # Partition Information # col_name data_type comment @@ -3578,7 +3572,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3695,7 +3689,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.189') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3812,7 +3806,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('45.789') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3935,22 +3929,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.8333333 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -3961,14 +3955,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4045,22 +4039,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.189') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 12 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.9166667 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -4071,14 +4065,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 11256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 11256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4124,7 +4118,6 @@ POSTHOOK: Input: default@ice_parquet_double # col_name data_type comment datecol date intcol int -pcol double # Partition Information # col_name data_type comment @@ -4272,7 +4265,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4389,7 +4382,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.189' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4506,7 +4499,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '45.789' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4629,7 +4622,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -4739,7 +4732,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.189' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -4818,7 +4811,6 @@ POSTHOOK: Input: default@ice_parquet_decimal # col_name data_type comment datecol date intcol int -pcol decimal(10,6) # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out index fcd1c17b24ed..02cd01ac06ee 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out @@ -64,7 +64,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-05' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -181,7 +181,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -298,7 +298,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-12' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -421,7 +421,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-13' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -531,7 +531,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-02' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -610,7 +610,6 @@ POSTHOOK: Input: default@ice_parquet_date_transform_year # col_name data_type comment bigintcol bigint intcol int -pcol date # Partition Information # col_name data_type comment @@ -760,8 +759,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -877,8 +876,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -994,8 +993,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1117,8 +1116,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1227,8 +1226,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1305,7 +1304,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_month # col_name data_type comment bigintcol bigint -pcol date intcol int # Partition Information @@ -1456,8 +1454,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1573,8 +1571,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1690,8 +1688,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1813,8 +1811,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1923,8 +1921,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2000,7 +1998,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_day POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_day # col_name data_type comment -pcol date bigintcol bigint intcol int @@ -2152,8 +2149,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2269,8 +2266,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhyuitogh' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2378,8 +2375,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhuiyoprj' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2455,7 +2452,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_truncate POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_truncate # col_name data_type comment -pcol string bigintcol bigint intcol int @@ -2591,8 +2587,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2708,8 +2704,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2817,8 +2813,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2894,7 +2890,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_bucket POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_bucket # col_name data_type comment -pcol string bigintcol bigint intcol int @@ -3123,7 +3118,6 @@ POSTHOOK: query: describe formatted ice_parquet_decimal_transform_bucket POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_decimal_transform_bucket # col_name data_type comment -pcol decimal(38,0) # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out index de49a0d3b366..9297d001d5be 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out @@ -85,7 +85,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.testice1000 Select Operator - expressions: _col0 (type: int), _col1 (type: string) + expressions: _col0 (type: int), 'rtyuiy' (type: string) outputColumnNames: a, b Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -162,7 +162,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testice1000 # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out index 063b6389863e..c8251dd3d219 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out @@ -71,22 +71,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('1') (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -97,14 +97,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,22 +182,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('1') (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -208,14 +208,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -261,7 +261,6 @@ POSTHOOK: Input: default@ice_parquet_int # col_name data_type comment strcol string intcol int -pcol int # Partition Information # col_name data_type comment @@ -389,7 +388,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'CA' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -642,7 +641,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -765,7 +764,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'CA' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -888,7 +887,7 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'TX' (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1013,7 +1012,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 2730 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 10 Data size: 2730 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1138,7 +1137,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 20 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1232,8 +1231,6 @@ POSTHOOK: Input: default@ice_parquet_string # col_name data_type comment name string age int -country string -state string # Partition Information # col_name data_type comment @@ -1405,7 +1402,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-31' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1526,22 +1523,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-31' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.75 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -1552,14 +1549,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1636,22 +1633,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.8333333 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -1662,14 +1659,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1715,7 +1712,6 @@ POSTHOOK: Input: default@ice_parquet_date # col_name data_type comment bigintcol bigint intcol int -pcol date # Partition Information # col_name data_type comment @@ -1873,7 +1869,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('34567890123456787') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1994,22 +1990,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('34567890123456787') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 4 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.75 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2020,14 +2016,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2104,22 +2100,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), UDFToLong('12346577399277578') (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.8333333 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2130,14 +2126,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2183,7 +2179,6 @@ POSTHOOK: Input: default@ice_parquet_bigint # col_name data_type comment datecol date intcol int -pcol bigint # Partition Information # col_name data_type comment @@ -2341,7 +2336,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2462,22 +2457,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 4 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.75 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2488,14 +2483,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3752 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 3752 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2572,22 +2567,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) + expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.189') (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.8333333 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2598,14 +2593,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2651,7 +2646,6 @@ POSTHOOK: Input: default@ice_parquet_double # col_name data_type comment datecol date intcol int -pcol double # Partition Information # col_name data_type comment @@ -2809,7 +2803,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2930,7 +2924,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 4 Data size: 240 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -3040,7 +3034,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), CAST( '3.189' AS decimal(10,6)) (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -3119,7 +3113,6 @@ POSTHOOK: Input: default@ice_parquet_decimal # col_name data_type comment datecol date intcol int -pcol decimal(10,6) # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out index 12745b17f098..5c1125df2fab 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out @@ -64,7 +64,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-05' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -181,7 +181,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -298,7 +298,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-12' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -417,7 +417,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-13' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -527,7 +527,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-02' AS DATE) (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -606,7 +606,6 @@ POSTHOOK: Input: default@ice_parquet_date_transform_year # col_name data_type comment bigintcol bigint intcol int -pcol date # Partition Information # col_name data_type comment @@ -734,8 +733,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -851,8 +850,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -968,8 +967,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1087,8 +1086,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1197,8 +1196,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1275,7 +1274,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_month # col_name data_type comment bigintcol bigint -pcol date intcol int # Partition Information @@ -1404,8 +1402,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1521,8 +1519,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1638,8 +1636,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1761,8 +1759,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1871,8 +1869,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1948,7 +1946,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_day POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_day # col_name data_type comment -pcol date bigintcol bigint intcol int @@ -2100,8 +2097,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2217,8 +2214,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhyuitogh' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2326,8 +2323,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), 'gfhuiyoprj' (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2403,7 +2400,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_truncate POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_truncate # col_name data_type comment -pcol string bigintcol bigint intcol int diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out index a364be2dedf7..bba70b438c06 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out @@ -35,31 +35,31 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 2 vectorized - File Output Operator [FS_18] + File Output Operator [FS_20] table:{"name:":"default.ice_parquet"} - Select Operator [SEL_17] + Select Operator [SEL_19] Output:["_col0","_col1","_col2","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_13] + SHUFFLE [RS_15] PartitionCols:_col2 - Select Operator [SEL_12] (rows=77 width=187) + Select Operator [SEL_14] (rows=77 width=187) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=77 width=187) default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"] Reducer 3 vectorized - File Output Operator [FS_21] - Select Operator [SEL_20] (rows=38 width=187) + File Output Operator [FS_23] + Select Operator [SEL_22] (rows=38 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Group By Operator [GBY_19] (rows=38 width=187) + Group By Operator [GBY_21] (rows=38 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_16] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=77 width=187) + Group By Operator [GBY_17] (rows=77 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"],keys:pcol - Select Operator [SEL_14] (rows=77 width=187) + Select Operator [SEL_16] (rows=77 width=187) Output:["strcol","intcol","pcol"] - Please refer to the previous Select Operator [SEL_12] + Please refer to the previous Select Operator [SEL_14] PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE `ice_parquet` PREHOOK: type: QUERY @@ -90,31 +90,31 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 2 vectorized - File Output Operator [FS_18] + File Output Operator [FS_20] table:{"name:":"default.ice_parquet"} - Select Operator [SEL_17] + Select Operator [SEL_19] Output:["_col0","_col1","_col2","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_13] + SHUFFLE [RS_15] PartitionCols:_col2 - Select Operator [SEL_12] (rows=77/6 width=187) + Select Operator [SEL_14] (rows=77/6 width=187) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=77/6 width=187) default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"] Reducer 3 vectorized - File Output Operator [FS_21] - Select Operator [SEL_20] (rows=38/3 width=187) + File Output Operator [FS_23] + Select Operator [SEL_22] (rows=38/3 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Group By Operator [GBY_19] (rows=38/3 width=187) + Group By Operator [GBY_21] (rows=38/3 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_16] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=77/3 width=187) + Group By Operator [GBY_17] (rows=77/3 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"],keys:pcol - Select Operator [SEL_14] (rows=77/6 width=187) + Select Operator [SEL_16] (rows=77/6 width=187) Output:["strcol","intcol","pcol"] - Please refer to the previous Select Operator [SEL_12] + Please refer to the previous Select Operator [SEL_14] PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet PREHOOK: type: QUERY diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index f8dfb22e5fa1..530185c19150 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -113,7 +113,7 @@ Stage-0 File Output Operator [FS_10] Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=3 width=336) + Map Join Operator [MAPJOIN_25] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -177,7 +177,7 @@ Stage-0 File Output Operator [FS_14] Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Map Join Operator [MAPJOIN_39] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] @@ -247,7 +247,7 @@ Stage-0 File Output Operator [FS_10] Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=3 width=336) + Map Join Operator [MAPJOIN_25] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -311,7 +311,7 @@ Stage-0 File Output Operator [FS_14] Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Map Join Operator [MAPJOIN_39] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] @@ -378,23 +378,23 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_54] - Select Operator [SEL_53] (rows=3 width=520) + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=3 width=336) - BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_32] (rows=3 width=336) + BucketMapJoin:true,Conds:SEL_31._col0, _col1=RS_29._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_49] + MULTICAST [RS_29] PartitionCols:_col1 - Select Operator [SEL_48] (rows=3 width=168) + Select Operator [SEL_28] (rows=3 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=3 width=168) + Filter Operator [FIL_27] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_51] (rows=10 width=168) + <-Select Operator [SEL_31] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=10 width=260) + Filter Operator [FIL_30] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:7,Grouping Partition Columns:["decimal_col"],Output:["date_col","string_col","decimal_col"] @@ -442,30 +442,30 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_61] - Select Operator [SEL_60] (rows=3 width=520) + File Output Operator [FS_51] + Select Operator [SEL_50] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=3 width=336) - BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_49] (rows=3 width=336) + BucketMapJoin:true,Conds:SEL_48._col0, _col1=RS_46._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_56] + MULTICAST [RS_46] PartitionCols:_col1 - Group By Operator [GBY_55] (rows=3 width=168) + Group By Operator [GBY_45] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_54] + SHUFFLE [RS_44] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=3 width=168) + Group By Operator [GBY_43] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col - Select Operator [SEL_52] (rows=3 width=168) + Select Operator [SEL_42] (rows=3 width=168) Output:["date_col","decimal_col"] - Filter Operator [FIL_51] (rows=3 width=168) + Filter Operator [FIL_41] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_58] (rows=10 width=168) + <-Select Operator [SEL_48] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_57] (rows=10 width=260) + Filter Operator [FIL_47] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:7,Grouping Partition Columns:["decimal_col"],Output:["date_col","string_col","decimal_col"] @@ -512,23 +512,23 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_54] - Select Operator [SEL_53] (rows=3 width=520) + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=3 width=336) - Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_32] (rows=3 width=336) + Conds:SEL_31._col0, _col1=RS_29._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_49] + BROADCAST [RS_29] PartitionCols:_col0, _col1 - Select Operator [SEL_48] (rows=3 width=168) + Select Operator [SEL_28] (rows=3 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=3 width=168) + Filter Operator [FIL_27] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_51] (rows=10 width=168) + <-Select Operator [SEL_31] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=10 width=260) + Filter Operator [FIL_30] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","string_col","decimal_col"] @@ -576,30 +576,30 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_61] - Select Operator [SEL_60] (rows=3 width=520) + File Output Operator [FS_51] + Select Operator [SEL_50] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=3 width=336) - Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Conds:SEL_48._col0, _col1=RS_46._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_56] + BROADCAST [RS_46] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=3 width=168) + Group By Operator [GBY_45] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_54] + SHUFFLE [RS_44] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=3 width=168) + Group By Operator [GBY_43] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col - Select Operator [SEL_52] (rows=3 width=168) + Select Operator [SEL_42] (rows=3 width=168) Output:["date_col","decimal_col"] - Filter Operator [FIL_51] (rows=3 width=168) + Filter Operator [FIL_41] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_58] (rows=10 width=168) + <-Select Operator [SEL_48] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_57] (rows=10 width=260) + Filter Operator [FIL_47] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","string_col","decimal_col"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out index 413fd81aa953..7ce866f2b03c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out @@ -251,39 +251,39 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_48] - Select Operator [SEL_47] (rows=1 width=798) + File Output Operator [FS_38] + Select Operator [SEL_37] (rows=1 width=798) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_46] (rows=1 width=500) + Group By Operator [GBY_36] (rows=1 width=500) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - File Output Operator [FS_42] + File Output Operator [FS_32] table:{"name:":"default.bucketmapjoin_tmp_result"} - Select Operator [SEL_41] (rows=785 width=366) + Select Operator [SEL_31] (rows=785 width=366) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_40] (rows=785 width=186) - BucketMapJoin:true,Conds:RS_37._col0=SEL_39._col0(Inner),Output:["_col0","_col1","_col3"] + Map Join Operator [MAPJOIN_30] (rows=785 width=186) + BucketMapJoin:true,Conds:RS_27._col0=SEL_29._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_37] + MULTICAST [RS_27] PartitionCols:_col0 - Select Operator [SEL_36] (rows=238 width=95) + Select Operator [SEL_26] (rows=238 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=238 width=95) + Filter Operator [FIL_25] (rows=238 width=95) predicate:key is not null TableScan [TS_0] (rows=238 width=95) default@srcbucket_mapjoin_n0,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_39] (rows=1000 width=95) + <-Select Operator [SEL_29] (rows=1000 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=1000 width=95) + Filter Operator [FIL_28] (rows=1000 width=95) predicate:key is not null TableScan [TS_3] (rows=1000 width=95) default@srcbucket_mapjoin_part_n0,b,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key"],Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_44] (rows=1 width=704) + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector_hll(key)","max(length(value1))","avg(COALESCE(length(value1),0))","count(value1)","compute_bit_vector_hll(value1)","max(length(value2))","avg(COALESCE(length(value2),0))","count(value2)","compute_bit_vector_hll(value2)"] - Select Operator [SEL_43] (rows=785 width=366) + Select Operator [SEL_33] (rows=785 width=366) Output:["key","value1","value2"] - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_31] PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -415,39 +415,39 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_48] - Select Operator [SEL_47] (rows=1 width=798) + File Output Operator [FS_38] + Select Operator [SEL_37] (rows=1 width=798) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_46] (rows=1 width=500) + Group By Operator [GBY_36] (rows=1 width=500) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - File Output Operator [FS_42] + File Output Operator [FS_32] table:{"name:":"default.bucketmapjoin_tmp_result"} - Select Operator [SEL_41] (rows=809 width=366) + Select Operator [SEL_31] (rows=809 width=366) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_40] (rows=809 width=186) - BucketMapJoin:true,Conds:RS_37._col0=SEL_39._col0(Inner),Output:["_col0","_col1","_col3"] + Map Join Operator [MAPJOIN_30] (rows=809 width=186) + BucketMapJoin:true,Conds:RS_27._col0=SEL_29._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_37] + MULTICAST [RS_27] PartitionCols:_col0 - Select Operator [SEL_36] (rows=238 width=95) + Select Operator [SEL_26] (rows=238 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=238 width=95) + Filter Operator [FIL_25] (rows=238 width=95) predicate:key is not null TableScan [TS_0] (rows=238 width=95) default@srcbucket_mapjoin_n0,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_39] (rows=524 width=95) + <-Select Operator [SEL_29] (rows=524 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=524 width=95) + Filter Operator [FIL_28] (rows=524 width=95) predicate:key is not null TableScan [TS_3] (rows=524 width=95) default@srcbucket_mapjoin_part_2,b,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_44] (rows=1 width=704) + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector_hll(key)","max(length(value1))","avg(COALESCE(length(value1),0))","count(value1)","compute_bit_vector_hll(value1)","max(length(value2))","avg(COALESCE(length(value2),0))","count(value2)","compute_bit_vector_hll(value2)"] - Select Operator [SEL_43] (rows=809 width=366) + Select Operator [SEL_33] (rows=809 width=366) Output:["key","value1","value2"] - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_31] PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out index bd82280c9c07..f21a3c84da24 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out @@ -122,27 +122,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_30] + Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_26] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_25._col0=RS_23._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_23] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_22] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_21] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_2_n4,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_25] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_24] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1_n1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] @@ -196,27 +196,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_35] + Group By Operator [GBY_34] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_31] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_27] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_26] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_2_n4,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_30] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_29] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1_n1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out index 4ddd445ecbfb..dd93fcf31851 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out @@ -235,27 +235,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_30] + Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=1797 width=8) - Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_26] (rows=1797 width=8) + Conds:SEL_25._col0=RS_23._col0(Inner) <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + BROADCAST [RS_23] PartitionCols:_col0 - Select Operator [SEL_32] (rows=738 width=4) + Select Operator [SEL_22] (rows=738 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=738 width=89) + Filter Operator [FIL_21] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_3] (rows=738 width=89) default@srcbucket_mapjoin_part_2_n6,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=738 width=4) + <-Select Operator [SEL_25] (rows=738 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=738 width=89) + Filter Operator [FIL_24] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_0] (rows=738 width=89) default@srcbucket_mapjoin_part_1_n2,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] @@ -302,33 +302,33 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_49] - Group By Operator [GBY_48] (rows=1 width=8) + File Output Operator [FS_39] + Group By Operator [GBY_38] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_47] - Group By Operator [GBY_46] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_37] + Group By Operator [GBY_36] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_45] (rows=1797 width=8) - Conds:SEL_44._col0, _col1=RS_39._col0, _col1(Inner) + Map Join Operator [MAPJOIN_35] (rows=1797 width=8) + Conds:SEL_34._col0, _col1=RS_29._col0, _col1(Inner) <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_39] + BROADCAST [RS_29] PartitionCols:_col0, _col1 - Select Operator [SEL_38] (rows=738 width=89) + Select Operator [SEL_28] (rows=738 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_37] (rows=738 width=89) + Filter Operator [FIL_27] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_3] (rows=738 width=89) default@srcbucket_mapjoin_part_2_n6,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - Dynamic Partitioning Event Operator [EVENT_42] (rows=2 width=85) - Group By Operator [GBY_41] (rows=2 width=85) + Dynamic Partitioning Event Operator [EVENT_32] (rows=2 width=85) + Group By Operator [GBY_31] (rows=2 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_40] (rows=738 width=85) + Select Operator [SEL_30] (rows=738 width=85) Output:["_col0"] - Please refer to the previous Select Operator [SEL_38] - <-Select Operator [SEL_44] (rows=738 width=89) + Please refer to the previous Select Operator [SEL_28] + <-Select Operator [SEL_34] (rows=738 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=738 width=89) + Filter Operator [FIL_33] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_0] (rows=738 width=89) default@srcbucket_mapjoin_part_1_n2,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out index 4b92f571e020..8ff5e300aa46 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out @@ -174,27 +174,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_35] + Group By Operator [GBY_34] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_31] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_27] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_26] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_2_n0,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_30] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_29] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] @@ -241,27 +241,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_30] + Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_26] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_25._col0=RS_23._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_23] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_22] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_21] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_3,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_25] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_24] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out index 11477c98ecc5..024fde7f1576 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out @@ -141,37 +141,37 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_48] - Select Operator [SEL_47] (rows=1 width=798) + File Output Operator [FS_38] + Select Operator [SEL_37] (rows=1 width=798) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_46] (rows=1 width=500) + Group By Operator [GBY_36] (rows=1 width=500) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - File Output Operator [FS_42] + File Output Operator [FS_32] table:{"name:":"default.bucketmapjoin_tmp_result_n3"} - Select Operator [SEL_41] (rows=809 width=366) + Select Operator [SEL_31] (rows=809 width=366) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_40] (rows=809 width=186) - BucketMapJoin:true,Conds:RS_37._col0=SEL_39._col0(Inner),Output:["_col0","_col1","_col3"] + Map Join Operator [MAPJOIN_30] (rows=809 width=186) + BucketMapJoin:true,Conds:RS_27._col0=SEL_29._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_37] + MULTICAST [RS_27] PartitionCols:_col0 - Select Operator [SEL_36] (rows=238 width=95) + Select Operator [SEL_26] (rows=238 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=238 width=95) + Filter Operator [FIL_25] (rows=238 width=95) predicate:key is not null TableScan [TS_0] (rows=238 width=95) default@srcbucket_mapjoin_n5,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_39] (rows=524 width=95) + <-Select Operator [SEL_29] (rows=524 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=524 width=95) + Filter Operator [FIL_28] (rows=524 width=95) predicate:key is not null TableScan [TS_3] (rows=524 width=95) default@srcbucket_mapjoin_part_2_n7,b,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_44] (rows=1 width=704) + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector_hll(key)","max(length(value1))","avg(COALESCE(length(value1),0))","count(value1)","compute_bit_vector_hll(value1)","max(length(value2))","avg(COALESCE(length(value2),0))","count(value2)","compute_bit_vector_hll(value2)"] - Select Operator [SEL_43] (rows=809 width=366) + Select Operator [SEL_33] (rows=809 width=366) Output:["key","value1","value2"] - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_31] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out index e32e34094e80..5c66fb5d0924 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out @@ -49,29 +49,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_61] - Limit [LIM_60] (rows=20 width=447) + File Output Operator [FS_46] + Limit [LIM_45] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_59] (rows=791 width=447) + Select Operator [SEL_44] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_58] - Top N Key Operator [TNK_57] (rows=791 width=447) + SHUFFLE [RS_43] + Top N Key Operator [TNK_42] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_56] (rows=791 width=447) - BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_41] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_40._col0, _col1=RS_38._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_53] + MULTICAST [RS_38] PartitionCols:_col0, _col1 - Select Operator [SEL_52] (rows=500 width=178) + Select Operator [SEL_37] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_51] (rows=500 width=178) + Filter Operator [FIL_36] (rows=500 width=178) predicate:(key is not null and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_55] (rows=500 width=269) + <-Select Operator [SEL_40] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=500 width=269) + Filter Operator [FIL_39] (rows=500 width=269) predicate:(key1 is not null and key2 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] @@ -147,29 +147,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_61] - Limit [LIM_60] (rows=20 width=447) + File Output Operator [FS_46] + Limit [LIM_45] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_59] (rows=473 width=447) + Select Operator [SEL_44] (rows=473 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_58] - Top N Key Operator [TNK_57] (rows=473 width=447) + SHUFFLE [RS_43] + Top N Key Operator [TNK_42] (rows=473 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_56] (rows=473 width=447) - BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_41] (rows=473 width=447) + BucketMapJoin:true,Conds:SEL_40._col0, _col1=RS_38._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_53] + MULTICAST [RS_38] PartitionCols:_col0, _col1 - Select Operator [SEL_52] (rows=387 width=178) + Select Operator [SEL_37] (rows=387 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_51] (rows=387 width=178) + Filter Operator [FIL_36] (rows=387 width=178) predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_55] (rows=387 width=269) + <-Select Operator [SEL_40] (rows=387 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=387 width=269) + Filter Operator [FIL_39] (rows=387 width=269) predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] @@ -245,29 +245,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=791 width=447) + Select Operator [SEL_34] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=791 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=791 width=447) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=500 width=178) + Select Operator [SEL_27] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=500 width=178) + Filter Operator [FIL_26] (rows=500 width=178) predicate:key is not null TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=500 width=269) + <-Select Operator [SEL_30] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=500 width=269) + Filter Operator [FIL_29] (rows=500 width=269) predicate:key1 is not null TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] @@ -343,29 +343,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=473 width=447) + Select Operator [SEL_34] (rows=473 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=473 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=473 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=473 width=447) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=473 width=447) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=178) + Select Operator [SEL_27] (rows=387 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=387 width=178) + Filter Operator [FIL_26] (rows=387 width=178) predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100'))) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=387 width=269) + <-Select Operator [SEL_30] (rows=387 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=387 width=269) + Filter Operator [FIL_29] (rows=387 width=269) predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100'))) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] @@ -443,29 +443,29 @@ Stage-0 limit:20 Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=612 width=447) + Select Operator [SEL_34] (rows=612 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=612 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=612 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=612 width=447) - Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=612 width=447) + Conds:RS_28._col0=SEL_30._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + BROADCAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=269) + Select Operator [SEL_27] (rows=387 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=387 width=269) + Filter Operator [FIL_26] (rows=387 width=269) predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] - <-Select Operator [SEL_35] (rows=500 width=178) + <-Select Operator [SEL_30] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=500 width=178) + Filter Operator [FIL_29] (rows=500 width=178) predicate:key is not null TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -541,29 +541,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=814 width=447) + Select Operator [SEL_34] (rows=814 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=814 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=814 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=814 width=447) - Conds:SEL_35._col1=RS_33._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=814 width=447) + Conds:SEL_30._col1=RS_28._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + BROADCAST [RS_28] PartitionCols:_col1 - Select Operator [SEL_32] (rows=500 width=178) + Select Operator [SEL_27] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=500 width=178) + Filter Operator [FIL_26] (rows=500 width=178) predicate:value is not null TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=500 width=269) + <-Select Operator [SEL_30] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=500 width=269) + Filter Operator [FIL_29] (rows=500 width=269) predicate:key2 is not null TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] @@ -637,29 +637,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_66] - Limit [LIM_65] (rows=20 width=447) + File Output Operator [FS_51] + Limit [LIM_50] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_64] (rows=791 width=447) + Select Operator [SEL_49] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_63] - Top N Key Operator [TNK_62] (rows=791 width=447) + SHUFFLE [RS_48] + Top N Key Operator [TNK_47] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_61] (rows=791 width=447) - BucketMapJoin:true,Conds:SEL_60._col0, _col1, _col2=RS_58._col0, _col1, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_46] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_45._col0, _col1, _col2=RS_43._col0, _col1, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_58] + MULTICAST [RS_43] PartitionCols:_col0, _col1 - Select Operator [SEL_57] (rows=500 width=178) + Select Operator [SEL_42] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_56] (rows=500 width=178) + Filter Operator [FIL_41] (rows=500 width=178) predicate:(key is not null and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_60] (rows=500 width=269) + <-Select Operator [SEL_45] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_59] (rows=500 width=269) + Filter Operator [FIL_44] (rows=500 width=269) predicate:(key1 is not null and key2 is not null and value is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out index e7a677f06282..148f2d89ee20 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out @@ -147,25 +147,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=16 width=8) + File Output Operator [FS_32] + Select Operator [SEL_31] (rows=16 width=8) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_35] - Map Join Operator [MAPJOIN_34] (rows=16 width=8) - BucketMapJoin:true,Conds:SEL_33._col0=RS_31._col0(Inner),Output:["_col0","_col1"] + SHUFFLE [RS_30] + Map Join Operator [MAPJOIN_29] (rows=16 width=8) + BucketMapJoin:true,Conds:SEL_28._col0=RS_26._col0(Inner),Output:["_col0","_col1"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_31] + MULTICAST [RS_26] PartitionCols:_col0 - Select Operator [SEL_30] (rows=4 width=4) + Select Operator [SEL_25] (rows=4 width=4) Output:["_col0"] - Filter Operator [FIL_29] (rows=4 width=4) + Filter Operator [FIL_24] (rows=4 width=4) predicate:key1 is not null TableScan [TS_3] (rows=6 width=3) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1"] - <-Select Operator [SEL_33] (rows=16 width=8) + <-Select Operator [SEL_28] (rows=16 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=16 width=8) + Filter Operator [FIL_27] (rows=16 width=8) predicate:key1 is not null TableScan [TS_0] (rows=16 width=8) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","id"] @@ -231,25 +231,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=16 width=92) + File Output Operator [FS_32] + Select Operator [SEL_31] (rows=16 width=92) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_35] - Map Join Operator [MAPJOIN_34] (rows=16 width=92) - BucketMapJoin:true,Conds:SEL_33._col0=RS_31._col0(Inner),Output:["_col0","_col1"] + SHUFFLE [RS_30] + Map Join Operator [MAPJOIN_29] (rows=16 width=92) + BucketMapJoin:true,Conds:SEL_28._col0=RS_26._col0(Inner),Output:["_col0","_col1"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_31] + MULTICAST [RS_26] PartitionCols:_col0 - Select Operator [SEL_30] (rows=4 width=87) + Select Operator [SEL_25] (rows=4 width=87) Output:["_col0"] - Filter Operator [FIL_29] (rows=4 width=87) + Filter Operator [FIL_24] (rows=4 width=87) predicate:key2 is not null TableScan [TS_3] (rows=6 width=72) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key2"] - <-Select Operator [SEL_33] (rows=16 width=92) + <-Select Operator [SEL_28] (rows=16 width=92) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=16 width=92) + Filter Operator [FIL_27] (rows=16 width=92) predicate:key2 is not null TableScan [TS_0] (rows=16 width=92) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key2"],Output:["key2","id"] @@ -399,25 +399,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_57] - Select Operator [SEL_56] (rows=12 width=96) + File Output Operator [FS_42] + Select Operator [SEL_41] (rows=12 width=96) Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - Map Join Operator [MAPJOIN_54] (rows=12 width=96) - BucketMapJoin:true,Conds:SEL_53._col0, _col1=RS_51._col0, _col1(Inner),Output:["_col0","_col1","_col2"] + SHUFFLE [RS_40] + Map Join Operator [MAPJOIN_39] (rows=12 width=96) + BucketMapJoin:true,Conds:SEL_38._col0, _col1=RS_36._col0, _col1(Inner),Output:["_col0","_col1","_col2"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_51] + MULTICAST [RS_36] PartitionCols:_col0, _col1 - Select Operator [SEL_50] (rows=3 width=91) + Select Operator [SEL_35] (rows=3 width=91) Output:["_col0","_col1"] - Filter Operator [FIL_49] (rows=3 width=91) + Filter Operator [FIL_34] (rows=3 width=91) predicate:(key1 is not null and key2 is not null) TableScan [TS_3] (rows=6 width=75) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2"] - <-Select Operator [SEL_53] (rows=12 width=96) + <-Select Operator [SEL_38] (rows=12 width=96) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_52] (rows=12 width=96) + Filter Operator [FIL_37] (rows=12 width=96) predicate:(key1 is not null and key2 is not null) TableScan [TS_0] (rows=12 width=96) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:32,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","id"] @@ -479,25 +479,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_57] - Select Operator [SEL_56] (rows=9 width=78) + File Output Operator [FS_47] + Select Operator [SEL_46] (rows=9 width=78) Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - Map Join Operator [MAPJOIN_54] (rows=9 width=78) - BucketMapJoin:true,Conds:SEL_53._col0, _col1=RS_51._col0, _col1(Inner),Output:["_col0","_col1","_col2"] + SHUFFLE [RS_45] + Map Join Operator [MAPJOIN_44] (rows=9 width=78) + BucketMapJoin:true,Conds:SEL_43._col0, _col1=RS_41._col0, _col1(Inner),Output:["_col0","_col1","_col2"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_51] + MULTICAST [RS_41] PartitionCols:_col0 - Select Operator [SEL_50] (rows=3 width=93) + Select Operator [SEL_40] (rows=3 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_49] (rows=3 width=93) + Filter Operator [FIL_39] (rows=3 width=93) predicate:(key1 is not null and value is not null) TableScan [TS_3] (rows=6 width=77) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","value"] - <-Select Operator [SEL_53] (rows=12 width=83) + <-Select Operator [SEL_43] (rows=12 width=83) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_52] (rows=12 width=83) + Filter Operator [FIL_42] (rows=12 width=83) predicate:(value is not null and key1 is not null) TableScan [TS_0] (rows=16 width=81) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","value","id"] @@ -559,25 +559,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_67] - Select Operator [SEL_66] (rows=5 width=150) + File Output Operator [FS_52] + Select Operator [SEL_51] (rows=5 width=150) Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_65] - Map Join Operator [MAPJOIN_64] (rows=5 width=150) - BucketMapJoin:true,Conds:SEL_63._col0, _col1, _col2=RS_61._col0, _col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3"] + SHUFFLE [RS_50] + Map Join Operator [MAPJOIN_49] (rows=5 width=150) + BucketMapJoin:true,Conds:SEL_48._col0, _col1, _col2=RS_46._col0, _col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_61] + MULTICAST [RS_46] PartitionCols:_col0, _col1 - Select Operator [SEL_60] (rows=2 width=180) + Select Operator [SEL_45] (rows=2 width=180) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_59] (rows=2 width=180) + Filter Operator [FIL_44] (rows=2 width=180) predicate:(key1 is not null and key2 is not null and value is not null) TableScan [TS_3] (rows=6 width=150) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] - <-Select Operator [SEL_63] (rows=8 width=164) + <-Select Operator [SEL_48] (rows=8 width=164) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_62] (rows=8 width=164) + Filter Operator [FIL_47] (rows=8 width=164) predicate:(value is not null and key1 is not null and key2 is not null) TableScan [TS_0] (rows=12 width=164) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:32,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value","id"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out index b4d8dcd592d5..c4a228888b3f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out @@ -135,7 +135,7 @@ POSTHOOK: query: alter table part_tbl set tblproperties ('compactor.threshold.ta POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: ice_comp_all@part_tbl POSTHOOK: Output: ice_comp_all@part_tbl -Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: merge into part_tbl t using (select 1 as id, 'p1_upd' as data, 10 as dept_id) s on t.dept_id = s.dept_id and t.id = s.id diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out index 7ea7605467ca..5d870e2466f1 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out @@ -48,7 +48,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat1 # col_name data_type comment c int -b string # Partition Information # col_name data_type comment @@ -140,7 +139,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat2 # col_name data_type comment c int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out index 770cc967d0bc..45f5be172bd3 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out @@ -47,8 +47,6 @@ POSTHOOK: query: describe formatted mat1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat1 # col_name data_type comment -b string -c int # Partition Information # col_name data_type comment @@ -141,8 +139,6 @@ POSTHOOK: query: describe formatted mat2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat2 # col_name data_type comment -b string -c int # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out b/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out index a44d9394025b..3b9b57aa94bf 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out @@ -82,8 +82,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -174,8 +172,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out index 24fff78df880..c3c309b656aa 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out @@ -37,11 +37,6 @@ equality_delete_record_count bigint Count of records in equality d equality_delete_file_count int Count of equality delete files last_updated_at timestamp with local time zone Commit time of snapshot that last updated this partition last_updated_snapshot_id bigint Id of snapshot that last updated this partition - -# Partition Information -# col_name data_type comment -d_part int Transform: identity -e_part int Transform: identity PREHOOK: query: select * from default.ice1.partitions PREHOOK: type: QUERY PREHOOK: Input: default@ice1 diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out index be765c27120a..61ddfbc3baa2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out @@ -79,7 +79,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_truncate # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -187,7 +186,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_truncate # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out index 5d4e328faf21..750ea7e0ba6d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out @@ -287,14 +287,14 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[293][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[289][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product -Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product -Warning: Shuffle Join MERGEJOIN[295][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[288][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1061,14 +1061,14 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[293][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[289][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product -Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product -Warning: Shuffle Join MERGEJOIN[295][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[288][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1116,7 +1116,7 @@ POSTHOOK: query: insert into tbl_ice_other values (10, 'ten'), (333, 'hundred') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_other -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain update tbl_ice set b='Changed forever' where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1138,21 +1138,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 14 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 3 <- Reducer 14 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) + Reducer 7 <- Map 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 10 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: tbl_ice @@ -1221,97 +1241,20 @@ STAGE PLANS: Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: t2 - filterExpr: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized Reducer 10 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 12 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -1327,7 +1270,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1373,20 +1316,182 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reducer 13 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) Reducer 14 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (row_number_window_0 = 1) (type: boolean) + Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), -1L (type: bigint), _col6 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'Changed forever' (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) + Reducer 5 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: string), VALUE._col7 (type: int), KEY.iceberg_bucket(_col5, 16) (type: int), KEY.iceberg_truncate(_col6, 3) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, iceberg_bucket(_col5, 16), iceberg_truncate(_col6, 3) + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.tbl_ice + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 2 + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reducer 7 Reduce Operator Tree: Merge Join Operator condition map: @@ -1401,7 +1506,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col9 (type: boolean) - Reducer 3 + Reducer 8 Reduce Operator Tree: Merge Join Operator condition map: @@ -1429,7 +1534,7 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: string) Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: bigint), _col7 (type: string) - Reducer 4 + Reducer 9 Reduce Operator Tree: Merge Join Operator condition map: @@ -1450,101 +1555,8 @@ STAGE PLANS: Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Reducer 6 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: string), VALUE._col7 (type: int), KEY.iceberg_bucket(_col5, 16) (type: int), KEY.iceberg_truncate(_col6, 3) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, iceberg_bucket(_col5, 16), iceberg_truncate(_col6, 3) - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.tbl_ice - Reducer 7 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'Changed forever' (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Reducer 8 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col5 - raw input shape: - window functions: - window function definition - alias: row_number_window_0 - name: row_number - window function: GenericUDAFRowNumberEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), -1L (type: bigint), _col6 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Union 5 - Vertex: Union 5 + Union 4 + Vertex: Union 4 Stage: Stage-2 Dependency Collection @@ -1563,7 +1575,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: update tbl_ice set b='Changed forever' where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1615,7 +1627,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -2074,7 +2086,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out index 1f837c4b586e..c9938d7a2236 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out @@ -53,8 +53,8 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[64][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out index 84de34cb28b8..e7a6c6090bcf 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out @@ -53,8 +53,8 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[70][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out index ecc74a61c6c2..4d8f0d994b14 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out @@ -53,8 +53,8 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[70][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out index 31676dd56a8c..6b7d2ee05aa4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out @@ -899,66 +899,66 @@ Stage-6 Dependency Collection{} Stage-4 Reducer 2 vectorized - File Output Operator [FS_64] + File Output Operator [FS_60] table:{"name:":"default.store_sales"} - Select Operator [SEL_63] + Select Operator [SEL_59] Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col3","iceberg_bucket(_col2, 3)"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_59] + SHUFFLE [RS_55] PartitionCols:_col3, iceberg_bucket(_col2, 3) - Select Operator [SEL_55] (rows=1 width=#Masked#) + Select Operator [SEL_51] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - Filter Operator [FIL_51] (rows=1 width=#Masked#) + Filter Operator [FIL_47] (rows=1 width=#Masked#) predicate:((_col24 = _col34) and (_col47 = _col37) and (_col36 = 2451181) and (floor((_col34 / 1000)) * 1000) BETWEEN 1000 AND 2000 and (_col30 < 0) and _col33 is null) - Select Operator [SEL_49] (rows=5 width=#Masked#) + Select Operator [SEL_45] (rows=5 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49"] - Map Join Operator [MAPJOIN_48] (rows=5 width=#Masked#) - BucketMapJoin:true,Conds:SEL_47._col2, _col1=RS_46._col8, _col7(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51"] + Map Join Operator [MAPJOIN_44] (rows=5 width=#Masked#) + BucketMapJoin:true,Conds:SEL_43._col2, _col1=RS_42._col8, _col7(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51"] <-Map 5 [CUSTOM_EDGE] vectorized - MULTICAST [RS_46] + MULTICAST [RS_42] PartitionCols:_col7 - Select Operator [SEL_45] (rows=2 width=#Masked#) + Select Operator [SEL_41] (rows=2 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27"] - Filter Operator [FIL_44] (rows=2 width=#Masked#) + Filter Operator [FIL_40] (rows=2 width=#Masked#) predicate:((ss_sold_date_sk = 2451181) and ss_item_sk is not null and ss_customer_sk is not null) TableScan [TS_2] (rows=2 width=#Masked#) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_list_price","ss_sales_price","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price","ss_ext_tax","ss_coupon_amt","ss_net_paid","ss_net_paid_inc_tax","ss_net_profit"] - <-Select Operator [SEL_47] (rows=5 width=#Masked#) + <-Select Operator [SEL_43] (rows=5 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] TableScan [TS_0] (rows=5 width=#Masked#) default@ssv,s,Tbl:COMPLETE,Col:NONE,Grouping Num Buckets:3,Grouping Partition Columns:["ss_item_sk2"],Output:["ss_sold_time_sk","ss_item_sk2","ss_customer_sk2","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_list_price","ss_sales_price","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price","ss_ext_tax","ss_coupon_amt","ss_net_paid","ss_net_paid_inc_tax","ss_net_profit"] Reducer 3 vectorized - File Output Operator [FS_66] + File Output Operator [FS_62] table:{"name:":"default.store_sales"} - Select Operator [SEL_65] + Select Operator [SEL_61] Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col3","iceberg_bucket(_col2, 3)"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_60] + SHUFFLE [RS_56] PartitionCols:_col3, iceberg_bucket(_col2, 3) - Select Operator [SEL_56] (rows=1 width=#Masked#) + Select Operator [SEL_52] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - Filter Operator [FIL_52] (rows=1 width=#Masked#) + Filter Operator [FIL_48] (rows=1 width=#Masked#) predicate:(_col24 is null and _col47 is null and _col36 is null) - Please refer to the previous Select Operator [SEL_49] + Please refer to the previous Select Operator [SEL_45] Reducer 4 vectorized - File Output Operator [FS_70] + File Output Operator [FS_66] table:{"name:":"default.merge_tmp_table"} - Select Operator [SEL_69] (rows=1 width=#Masked#) + Select Operator [SEL_65] (rows=1 width=#Masked#) Output:["_col0"] - Filter Operator [FIL_68] (rows=1 width=#Masked#) + Filter Operator [FIL_64] (rows=1 width=#Masked#) predicate:(_col4 > 1L) - Group By Operator [GBY_67] (rows=1 width=#Masked#) + Group By Operator [GBY_63] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_62] + SHUFFLE [RS_58] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_61] (rows=1 width=#Masked#) + Group By Operator [GBY_57] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count()"],keys:_col4, _col40, _col6, _col45 - Select Operator [SEL_57] (rows=1 width=#Masked#) + Select Operator [SEL_53] (rows=1 width=#Masked#) Output:["_col4","_col6","_col40","_col45"] - Filter Operator [FIL_53] (rows=1 width=#Masked#) + Filter Operator [FIL_49] (rows=1 width=#Masked#) predicate:((_col24 = _col34) and (_col47 = _col37) and (_col36 = 2451181) and (floor((_col34 / 1000)) * 1000) BETWEEN 1000 AND 2000 and (_col30 < 0)) - Please refer to the previous Select Operator [SEL_49] + Please refer to the previous Select Operator [SEL_45] Stage-7 Stats Work{} Stage-3 diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out index cd0ce562a725..36b5b988c492 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out @@ -757,10 +757,10 @@ PREHOOK: Input: default@tbl_ice_mixed_parted POSTHOOK: query: describe tbl_ice_mixed_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_mixed_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out index fdf2679f5b2a..0b7260d84206 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out @@ -526,10 +526,10 @@ PREHOOK: Input: default@tbl_ice_orc_parted POSTHOOK: query: describe tbl_ice_orc_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_orc_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out index acc7794e12ce..675262df24d7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out @@ -430,10 +430,10 @@ PREHOOK: Input: default@tbl_ice_parquet_parted POSTHOOK: query: describe tbl_ice_parquet_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_parquet_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java index b3d37fb9c40a..3aadf916a470 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java @@ -161,7 +161,7 @@ private void createTableNonReplaceMode(Table tbl) throws HiveException { if (desc.isCTAS()) { Table createdTable = context.getDb().getTable(tbl.getDbName(), tbl.getTableName()); DataContainer dc = new DataContainer(createdTable.getTTable()); - context.getQueryState().getLineageState().setLineage(createdTable.getPath(), dc, createdTable.getCols()); + context.getQueryState().getLineageState().setLineage(createdTable.getPath(), dc, createdTable.getStorageSchemaCols()); // We did not create the table before moving the data files for a non-partitioned table i.e // we used load file instead of load table (see SemanticAnalyzer#getFileSinkPlan() for diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index 9196a3441200..3487dc443db1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -909,7 +909,7 @@ private String getExternal(Table table) { private String getColumns(Table table) { List columnDescs = new ArrayList<>(); - for (FieldSchema column : table.getCols()) { + for (FieldSchema column : table.getStorageSchemaCols()) { String columnType = formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType())); String columnDesc = " " + unparseIdentifier(column.getName()) + " " + columnType; if (column.getComment() != null) { From 18243b1d0d576b18091900c67ae157029972bb2a Mon Sep 17 00:00:00 2001 From: Ramit Gupta Date: Wed, 27 May 2026 02:49:09 +0530 Subject: [PATCH 19/20] updated stats autogather and test output --- .../iceberg_insert_into_partition.q.out | 160 +++++++++--------- ...erg_insert_into_partition_transforms.q.out | 42 ++--- ...insert_into_partition_with_evolution.q.out | 2 +- .../iceberg_insert_overwrite_partition.q.out | 136 +++++++-------- ...nsert_overwrite_partition_transforms.q.out | 36 ++-- .../iceberg_row_lineage_compactions.q.out | 2 +- .../parse/ColumnStatsAutoGatherContext.java | 22 ++- 7 files changed, 208 insertions(+), 192 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out index 86a8569e9ad9..97989584ccf7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out @@ -62,7 +62,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('1') (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -179,7 +179,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('2') (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -289,22 +289,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('3') (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -315,14 +315,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -400,22 +400,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('04') (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -426,14 +426,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -602,7 +602,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'CA' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -720,7 +720,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'CA' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -838,7 +838,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'TX' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1085,7 +1085,7 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 3252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 12 Data size: 3252 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1208,7 +1208,7 @@ STAGE PLANS: Statistics: Num rows: 24 Data size: 6504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'CA' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 24 Data size: 6504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1331,7 +1331,7 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 13008 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'TX' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 48 Data size: 13008 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1456,7 +1456,7 @@ STAGE PLANS: Statistics: Num rows: 96 Data size: 26208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 96 Data size: 26208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1581,7 +1581,7 @@ STAGE PLANS: Statistics: Num rows: 192 Data size: 52416 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 192 Data size: 52416 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2184,7 +2184,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2301,7 +2301,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2418,7 +2418,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2541,22 +2541,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.8333333 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2567,14 +2567,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2651,22 +2651,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.9166667 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12 Data size: 7872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 12 Data size: 7872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2677,14 +2677,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12 Data size: 7872 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 12 Data size: 11808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 11808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2879,7 +2879,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('34567890123456787') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2996,7 +2996,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('12346577399277578') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3113,7 +3113,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('45637829068876994') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3236,22 +3236,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('45637829068876994') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.8333333 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -3262,14 +3262,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3346,22 +3346,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('12346577399277578') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 12 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.9166667 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -3372,14 +3372,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 12 Data size: 11232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 11232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3572,7 +3572,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3689,7 +3689,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.189') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3806,7 +3806,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('45.789') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3929,22 +3929,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.8333333 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -3955,14 +3955,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4039,22 +4039,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.189') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 12 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.9166667 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -4065,14 +4065,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 12 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 12 Data size: 11256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 11256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4265,7 +4265,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4382,7 +4382,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.189' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4499,7 +4499,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '45.789' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4622,7 +4622,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -4732,7 +4732,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.189' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out index 02cd01ac06ee..f25a8b68fa3a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out @@ -64,7 +64,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-05' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -181,7 +181,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -298,7 +298,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -421,7 +421,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -531,7 +531,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -759,7 +759,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -876,7 +876,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -993,7 +993,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1116,7 +1116,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1226,7 +1226,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1454,7 +1454,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1571,7 +1571,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1688,7 +1688,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1811,7 +1811,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1921,7 +1921,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2149,7 +2149,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2266,7 +2266,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhyuitogh' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2375,7 +2375,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhuiyoprj' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2587,7 +2587,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2704,7 +2704,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2813,7 +2813,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out index 9297d001d5be..f0e686e46014 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out @@ -85,7 +85,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.testice1000 Select Operator - expressions: _col0 (type: int), 'rtyuiy' (type: string) + expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: a, b Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out index c8251dd3d219..2f55ccfbc41d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out @@ -71,22 +71,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('1') (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -97,14 +97,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,22 +182,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_int Select Operator - expressions: _col0 (type: string), _col1 (type: int), UDFToInteger('1') (type: int) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: strcol, intcol, pcol Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(strcol)), avg(COALESCE(length(strcol),0)), count(1), count(strcol), compute_bit_vector_hll(strcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: int) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: int), _col11 (type: int), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -208,14 +208,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 2 Data size: 992 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), UDFToLong(_col10) (type: bigint), UDFToLong(_col11) (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 1676 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 838 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -388,7 +388,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_string Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), 'CA' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -641,7 +641,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'USA' (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -764,7 +764,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'CA' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 4 Data size: 1084 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -887,7 +887,7 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), 'TX' (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 6 Data size: 1626 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1012,7 +1012,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 2730 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 10 Data size: 2730 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1137,7 +1137,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) Select Operator - expressions: _col0 (type: string), _col1 (type: int), 'India' (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string) outputColumnNames: name, age, country, state Statistics: Num rows: 20 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1402,7 +1402,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1523,22 +1523,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.75 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 4 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -1549,14 +1549,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 4 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1633,22 +1633,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: date) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.8333333 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: date), _col11 (type: date), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -1659,14 +1659,14 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3936 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'LONG' (type: string), _col1 (type: bigint), _col2 (type: bigint), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DATE' (type: string), _col10 (type: date), _col11 (type: date), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 5904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1869,7 +1869,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('34567890123456787') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1990,22 +1990,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('34567890123456787') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 4 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.75 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2016,14 +2016,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 4 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2100,22 +2100,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_bigint Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToLong('12346577399277578') (type: bigint) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: bigint) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: bigint) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.8333333 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2126,14 +2126,14 @@ STAGE PLANS: keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'LONG' (type: string), _col10 (type: bigint), _col11 (type: bigint), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 936 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2336,7 +2336,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2457,22 +2457,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.14786') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 4 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.75 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2483,14 +2483,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4 Data size: 2432 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 4 Data size: 3752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 3752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2567,22 +2567,22 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_double Select Operator - expressions: _col0 (type: date), _col1 (type: int), UDFToDouble('3.189') (type: double) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: double) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(datecol), max(datecol), count(1), count(datecol), compute_bit_vector_hll(datecol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol) keys: pcol (type: double) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.8333333 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary), _col6 (type: int), _col7 (type: int), _col8 (type: bigint), _col9 (type: binary), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: binary) Execution mode: vectorized Reducer 2 @@ -2593,14 +2593,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6 Data size: 3648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'DATE' (type: string), _col1 (type: date), _col2 (type: date), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), 'LONG' (type: string), UDFToLong(_col6) (type: bigint), UDFToLong(_col7) (type: bigint), (_col3 - _col8) (type: bigint), COALESCE(ndv_compute_bit_vector(_col9),0) (type: bigint), _col9 (type: binary), 'DOUBLE' (type: string), _col10 (type: double), _col11 (type: double), (_col3 - _col12) (type: bigint), COALESCE(ndv_compute_bit_vector(_col13),0) (type: bigint), _col13 (type: binary), named_struct('pcol',_col0) (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 5628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2803,7 +2803,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2924,7 +2924,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.14786' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 4 Data size: 240 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -3034,7 +3034,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_decimal Select Operator - expressions: _col0 (type: date), _col1 (type: int), CAST( '3.189' AS decimal(10,6)) (type: decimal(10,6)) + expressions: _col0 (type: date), _col1 (type: int), _col2 (type: decimal(10,6)) outputColumnNames: datecol, intcol, pcol Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out index 5c1125df2fab..e1764f671867 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out @@ -64,7 +64,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-05' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -181,7 +181,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -298,7 +298,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -417,7 +417,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -527,7 +527,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_year Select Operator - expressions: _col0 (type: bigint), _col1 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -733,7 +733,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -850,7 +850,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -967,7 +967,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1086,7 +1086,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1196,7 +1196,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1402,7 +1402,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-31' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1519,7 +1519,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-26' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1636,7 +1636,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-12' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -1759,7 +1759,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-13' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1869,7 +1869,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), CAST( '1999-12-02' AS DATE) (type: date) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2097,7 +2097,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhutjkgkd' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2214,7 +2214,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhyuitogh' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2323,7 +2323,7 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col1 (type: bigint), _col2 (type: int), 'gfhuiyoprj' (type: string) + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out index c4a228888b3f..9659606b1045 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out @@ -135,7 +135,7 @@ POSTHOOK: query: alter table part_tbl set tblproperties ('compactor.threshold.ta POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: ice_comp_all@part_tbl POSTHOOK: Output: ice_comp_all@part_tbl -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: merge into part_tbl t using (select 1 as id, 'p1_upd' as data, 10 as dept_id) s on t.dept_id = s.dept_id and t.id = s.id diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 3ff9d30955d5..ff851577611f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -276,6 +276,15 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator entry : partSpec.entrySet()) { + if (entry.getValue() == null) { + dynPartsCount++; + } + } + } + boolean inputRRHasStaticParts = (this.columns.size() + dynPartsCount < columns.size()); // if there is any partition column (in static partition or dynamic // partition or mixed case) int dynamicPartBegin = 0; @@ -284,6 +293,8 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator Date: Wed, 27 May 2026 19:50:45 +0530 Subject: [PATCH 20/20] updated getPartitionKeys --- .../hive/ql/ddl/table/AlterTableUtils.java | 3 +- .../apache/hadoop/hive/ql/metadata/Table.java | 33 ++++++++----------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 7 ++-- .../ql/parse/RewriteSemanticAnalyzer.java | 3 +- .../hive/ql/parse/SemanticAnalyzer.java | 3 +- .../hive/ql/parse/rewrite/MergeRewriter.java | 6 ++-- .../ql/parse/rewrite/SplitUpdateRewriter.java | 6 ++-- 7 files changed, 23 insertions(+), 38 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java index ed1945ba657e..6d543351f21c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java @@ -78,8 +78,7 @@ public static boolean isSchemaEvolutionEnabled(Table table, Configuration conf) } public static boolean isFullPartitionSpec(Table table, Map partitionSpec) { - List partKeys = Objects.requireNonNullElse(table.getPartitionKeys(), new ArrayList<>()); - for (FieldSchema partitionCol : partKeys) { + for (FieldSchema partitionCol : table.getPartitionKeys()) { if (partitionSpec.get(partitionCol.getName()) == null) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 342f846fca2f..54cf9ac80e7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -32,10 +32,8 @@ import java.util.Objects; import java.util.Properties; import java.util.Set; -import java.util.TreeMap; import java.util.stream.Collectors; -import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.collections4.CollectionUtils; @@ -118,7 +116,7 @@ public class Table implements Serializable { * These fields are all cached fields. The information comes from tTable. */ private List tablePartCols; - private Map> inputColumnIndexByName; + private Map> inputColnameToIndFsMap; private transient Deserializer deserializer; private Class outputFormatClass; private Class inputFormatClass; @@ -607,14 +605,6 @@ public boolean equals(Object obj) { && Objects.equals(snapshotRef, other.snapshotRef); } - private List getNativePartCols() { - List partKeys = tTable.getPartitionKeys(); - if (partKeys == null) { - partKeys = new ArrayList<>(); - tTable.setPartitionKeys(partKeys); - } - return partKeys; - } /** * Returns partition columns, consulting the storage handler for non-native tables (e.g. Iceberg) @@ -635,7 +625,7 @@ public List getPartCols() { } tablePartCols = partCols; } else { - tablePartCols = getNativePartCols(); + tablePartCols = getPartitionKeys(); } return tablePartCols; } @@ -770,28 +760,28 @@ private boolean isField(String col) { } private void fillColumnIndexByName() { - inputColumnIndexByName = new HashMap<>(); + inputColnameToIndFsMap = new HashMap<>(); List fsList = new ArrayList<>(getColsInternal(false)); if (!isNonNative()) { - fsList.addAll(getNativePartCols()); + fsList.addAll(getPartitionKeys()); } for (int i = 0; i < fsList.size(); i++) { - inputColumnIndexByName.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i))); + inputColnameToIndFsMap.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i))); } } public int getColumnIndexByName(String colName) { - if (inputColumnIndexByName == null) { + if (inputColnameToIndFsMap == null) { fillColumnIndexByName(); } - return inputColumnIndexByName.get(colName.toLowerCase()).getLeft(); + return inputColnameToIndFsMap.get(colName.toLowerCase()).getLeft(); } public FieldSchema getFieldSchemaByName(String colName) { - if (inputColumnIndexByName == null) { + if (inputColnameToIndFsMap == null) { fillColumnIndexByName(); } - return inputColumnIndexByName.get(colName).getRight(); + return inputColnameToIndFsMap.get(colName).getRight(); } public List getStorageSchemaCols() { @@ -845,7 +835,7 @@ private List getColsInternal(boolean forMs) { public List getAllCols() { List fsList = new ArrayList<>(getColsInternal(false)); if (!isNonNative()) { - fsList.addAll(getNativePartCols()); + fsList.addAll(getPartitionKeys()); } return fsList; } @@ -998,6 +988,9 @@ public void setDbName(String databaseName) { } public List getPartitionKeys() { + if (tTable.getPartitionKeys() == null) { + tTable.setPartitionKeys(new ArrayList<>()); + } return tTable.getPartitionKeys(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 68ebf8be17fb..d759a91372c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3014,11 +3014,10 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - List partKeys = Objects.requireNonNullElse(tabMetaData.getPartitionKeys(), new ArrayList<>()); - for (FieldSchema part_col : partKeys) { - colName = part_col.getName(); + for (FieldSchema partCol : tabMetaData.getPartitionKeys()) { + colName = partCol.getName(); colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), isNullable(colName, nnc, pkc), tableAlias, true); rr.put(tableAlias, colName, colInfo); cInfoLst.add(colInfo); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index a21ce9e60cef..a96f32244f91 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -118,9 +118,8 @@ protected void analyzeRewrittenTree(ASTNode rewrittenTree, Context rewrittenCtx) */ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) throws SemanticException { String columnName = normalizeColName(colName.getText()); - List partKeys = Objects.requireNonNullElse(targetTable.getPartitionKeys(), new ArrayList<>()); // Make sure this isn't one of the partitioning columns, that's not supported. - for (FieldSchema fschema : partKeys) { + for (FieldSchema fschema : targetTable.getPartitionKeys()) { if (fschema.getName().equalsIgnoreCase(columnName)) { throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 98c3b4d64dd9..47072ee7770a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12040,8 +12040,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns - List partKeys = Objects.requireNonNullElse(tab.getPartitionKeys(),new ArrayList<>()); - for (FieldSchema partCol : partKeys) { + for (FieldSchema partCol : tab.getPartitionKeys()) { LOG.trace("Adding partition col: " + partCol); rwsch.put(alias, partCol.getName(), new ColumnInfo(partCol.getName(), TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), alias, true)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index 366b26a296f1..ba6640c6868b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -249,10 +249,8 @@ protected void addValues(Table targetTable, String targetAlias, Map values.add( - formatter.apply(fieldSchema.getName()))); - } + targetTable.getPartitionKeys().forEach(fieldSchema -> values.add( + formatter.apply(fieldSchema.getName()))); } protected String getRhsExpValue(String newValue, String alias) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java index 1a29f8486e85..d5517b8fcfff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java @@ -100,10 +100,8 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB insertValues.add(sqlGenerator.qualify(identifier)); } - if (updateBlock.getTargetTable().getPartitionKeys() != null) { - updateBlock.getTargetTable().getPartitionKeys().forEach( - fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); - } + updateBlock.getTargetTable().getPartitionKeys().forEach( + fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); addRowLineageColumnsForUpdate(updateBlock.getTargetTable(), sqlGenerator, insertValues, conf); sqlGenerator.append(" FROM ").append(sqlGenerator.getTargetTableFullName()).append(") ");