diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 3a4b1f1f8d7d..fdad33a2583b 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -1536,10 +1536,10 @@ public List acidSelectColumns(org.apache.hadoop.hive.ql.metadata.Ta case DELETE -> // TODO: make it configurable whether we want to include the table columns in the select query. // It might make delete writes faster if we don't have to write out the row object - ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols()); + ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getStorageSchemaCols()); case UPDATE -> shouldOverwrite(table, operation) ? ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA : - ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols()); + ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getStorageSchemaCols()); case MERGE -> ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA; default -> ImmutableList.of(); }; @@ -2130,6 +2130,9 @@ public List getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm } public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (hmsTable.getMetaTable() != null) { + return false; + } if (!hmsTable.getTTable().isSetId()) { return false; } @@ -2275,6 +2278,9 @@ public boolean canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table @Override public List getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (hmsTable.getMetaTable() != null) { + return Collections.emptyList(); + } if (!hmsTable.getTTable().isSetId()) { return Collections.emptyList(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index a320148858e0..4284ce7a26e2 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -946,7 +946,8 @@ public void testCreateTableWithoutColumnComments() { @Test public void testCreatePartitionedTableWithColumnComments() { TableIdentifier identifier = TableIdentifier.of("default", "partitioned_with_comment_table"); - String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", null}; + String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", + "Transform: identity"}; shell.executeStatement("CREATE EXTERNAL TABLE partitioned_with_comment_table (" + "t_int INT COMMENT 'int column', " + "t_string STRING COMMENT 'string column', " + @@ -959,13 +960,18 @@ public void testCreatePartitionedTableWithColumnComments() { List rows = shell.executeStatement("DESCRIBE default.partitioned_with_comment_table"); List columns = icebergTable.schema().columns(); + List partitionColumns = List.of("t_string_3", "t_string_4"); // The partition transform information and partition information is 6 extra lines, and 4 more line for the columns Assert.assertEquals(columns.size() + 10, rows.size()); for (int i = 0; i < columns.size(); i++) { Types.NestedField field = columns.get(i); - Assert.assertArrayEquals(new Object[] {field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), - field.doc() != null ? field.doc() : ""}, rows.get(i)); - Assert.assertEquals(expectedDoc[i], field.doc()); + String fieldDoc = field.doc(); + if (fieldDoc == null && partitionColumns.contains(field.name())) { + fieldDoc = "Transform: identity"; + } + Assert.assertArrayEquals(new Object[]{field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), + fieldDoc != null ? fieldDoc : ""}, rows.get(i)); + Assert.assertEquals(expectedDoc[i], fieldDoc); } } diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out index 910e48e4214e..c9697302c550 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out @@ -177,8 +177,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_orc # col_name data_type comment a int -b string -c string # Partition Information # col_name data_type comment @@ -453,8 +451,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet # col_name data_type comment a int -b string -c string # Partition Information # col_name data_type comment @@ -729,8 +725,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_avro # col_name data_type comment a int -b string -c string # Partition Information # col_name data_type comment @@ -1066,9 +1060,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_orc_mixed # col_name data_type comment a int -b double -c int -d string # Partition Information # col_name data_type comment @@ -1513,9 +1504,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet_mixed # col_name data_type comment a int -b double -c int -d string # Partition Information # col_name data_type comment @@ -1960,9 +1948,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_avro_mixed # col_name data_type comment a int -b double -c int -d string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out index 55bfee6eb031..8646ebb240e7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out @@ -137,7 +137,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_orc # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -415,7 +414,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -770,7 +768,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet_int # col_name data_type comment a int -b int # Partition Information # col_name data_type comment @@ -1125,7 +1122,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_parquet_double # col_name data_type comment a int -b double # Partition Information # col_name data_type comment @@ -1426,7 +1422,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_avro # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out b/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out index 8153bdd697f3..23b2d41d3dd0 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/bucket_map_join_9.q.out @@ -34,21 +34,21 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized - File Output Operator [FS_53] - Map Join Operator [MAPJOIN_52] (rows=2 width=530) - BucketMapJoin:true,Conds:SEL_51._col1, _col2=RS_49._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + File Output Operator [FS_23] + Map Join Operator [MAPJOIN_22] (rows=2 width=530) + BucketMapJoin:true,Conds:SEL_21._col1, _col2=RS_19._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 2 [CUSTOM_EDGE] vectorized - MULTICAST [RS_49] + MULTICAST [RS_19] PartitionCols:_col2, _col1 - Select Operator [SEL_48] (rows=2 width=265) + Select Operator [SEL_18] (rows=2 width=265) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_47] (rows=2 width=265) + Filter Operator [FIL_17] (rows=2 width=265) predicate:(id is not null and part is not null) TableScan [TS_3] (rows=2 width=265) default@tbl,tbl2,Tbl:COMPLETE,Col:COMPLETE,Output:["foid","part","id"] - <-Select Operator [SEL_51] (rows=2 width=265) + <-Select Operator [SEL_21] (rows=2 width=265) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_50] (rows=2 width=265) + Filter Operator [FIL_20] (rows=2 width=265) predicate:(id is not null and part is not null) TableScan [TS_0] (rows=2 width=265) default@tbl,tbl,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:100,Grouping Partition Columns:["id","part"],Output:["foid","part","id"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out index 6810c72c93e8..ef5c4410fbb6 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out @@ -347,7 +347,7 @@ PREHOOK: Input: default@tbl_ice_puffin POSTHOOK: query: desc formatted tbl_ice_puffin C POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_puffin -col_name C +col_name c data_type int min 52 max 56 @@ -358,7 +358,7 @@ max_col_len num_trues num_falses bit_vector HL -comment +comment Transform: identity COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a) PREHOOK: type: QUERY diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out index 0d1700ff07a9..eba54dc24523 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out @@ -273,8 +273,6 @@ POSTHOOK: query: describe formatted tbl_ice POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice # col_name data_type comment -a int -b string c int # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out index 6e6d2da48e8c..decc037d088b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_partitioned.q.out @@ -67,12 +67,12 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[227][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[223][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[229][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[221][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[231][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[220][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[222][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[218][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[216][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[226][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -753,12 +753,12 @@ STAGE PLANS: Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[227][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[223][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[229][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[221][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[231][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[220][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[222][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[218][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[216][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[226][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 16' is a cross product PREHOOK: query: delete from tbl_ice where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -799,7 +799,7 @@ POSTHOOK: query: insert into tbl_ice_other values (10, 'ten'), (333, 'hundred') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_other -Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1175,12 +1175,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -1207,7 +1219,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1251,7 +1263,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1273,13 +1285,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) - Reducer 11 <- Reducer 9 (SIMPLE_EDGE) - Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 5 (CONTAINS) @@ -1365,19 +1377,6 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reducer 10 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -1396,7 +1395,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 12 + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -1412,7 +1411,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1458,6 +1457,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 13 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -1604,17 +1616,6 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator keys: _col1 (type: int) minReductionHashAggr: 0.99 @@ -1643,6 +1644,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -1679,7 +1691,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[155][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: delete from tbl_ice where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out b/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out index 9ae5ea93e5f8..e75c252fcfdd 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/desc_ice_tbl_part_spec.q.out @@ -74,6 +74,8 @@ a int b string c int d string +c int Transform: identity +d string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out index fb1cdbcaf12a..b494e9f54e62 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out @@ -109,13 +109,6 @@ POSTHOOK: query: DESCRIBE FORMATTED ice_t_transform POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_transform # col_name data_type comment -year_field date -month_field date -day_field date -hour_field timestamp -truncate_field string -bucket_field int -identity_field int # Partition Information # col_name data_type comment @@ -182,13 +175,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_transform_prop # col_name data_type comment id int -year_field date -month_field date -day_field date -hour_field timestamp -truncate_field string -bucket_field int -identity_field int # Partition Information # col_name data_type comment @@ -255,7 +241,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_t_identity_part # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -338,7 +323,7 @@ max_col_len num_trues num_falses bit_vector -comment +comment Transform: year COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bucket_field\":\"true\",\"day_field\":\"true\",\"hour_field\":\"true\",\"identity_field\":\"true\",\"month_field\":\"true\",\"truncate_field\":\"true\",\"year_field\":\"true\"}} PREHOOK: query: DESCRIBE FORMATTED ice_t_transform_prop id PREHOOK: type: DESCTABLE diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out index 0c58a118a7ce..cd09db327734 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out @@ -806,7 +806,7 @@ Stage-3 Group By Operator [GBY_15] (rows=21 width=584) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["min(id)","max(id)","count(1)","count(id)","compute_bit_vector_hll(id)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"],keys:iceberg_truncate(id, 2) Select Operator [SEL_14] (rows=22 width=87) - Output:["id","ccy"] + Output:["ccy","id"] Please refer to the previous Select Operator [SEL_12] PREHOOK: query: insert into table tbl_target_truncate_int select a, b from tbl_src @@ -1101,7 +1101,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=672) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_date)","max(date_time_date)","count(date_time_date)","compute_bit_vector_hll(date_time_date)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_year(date_time_date) Select Operator [SEL_7] (rows=1 width=240) - Output:["id","date_time_date","year_partition"] + Output:["id","year_partition","date_time_date"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_year_date values (88669, '2018-05-27', 2018), (40568, '2018-02-12', 2018), (40568, '2018-07-03', 2018) @@ -1187,7 +1187,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_year(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_year_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) @@ -1273,7 +1273,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=672) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_date)","max(date_time_date)","count(date_time_date)","compute_bit_vector_hll(date_time_date)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_month(date_time_date) Select Operator [SEL_7] (rows=1 width=240) - Output:["id","date_time_date","year_partition"] + Output:["id","year_partition","date_time_date"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_month_date values (88669, '2018-05-27', 2018), (40568, '2018-02-12', 2018), (40568, '2018-07-03', 2018) @@ -1359,7 +1359,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_month(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_month_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) @@ -1445,7 +1445,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=672) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_date)","max(date_time_date)","count(date_time_date)","compute_bit_vector_hll(date_time_date)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_day(date_time_date) Select Operator [SEL_7] (rows=1 width=240) - Output:["id","date_time_date","year_partition"] + Output:["id","year_partition","date_time_date"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_day_date values (88669, '2018-05-27', 2018), (40568, '2018-02-12', 2018), (40568, '2018-07-03', 2018) @@ -1531,7 +1531,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_day(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_day_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) @@ -1617,7 +1617,7 @@ Stage-3 Group By Operator [GBY_8] (rows=1 width=640) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["max(length(id))","avg(COALESCE(length(id),0))","count(1)","count(id)","compute_bit_vector_hll(id)","min(date_time_timestamp)","max(date_time_timestamp)","count(date_time_timestamp)","compute_bit_vector_hll(date_time_timestamp)","min(year_partition)","max(year_partition)","count(year_partition)","compute_bit_vector_hll(year_partition)"],keys:year_partition, iceberg_hour(date_time_timestamp) Select Operator [SEL_7] (rows=1 width=224) - Output:["id","date_time_timestamp","year_partition"] + Output:["id","year_partition","date_time_timestamp"] Please refer to the previous Select Operator [SEL_3] PREHOOK: query: insert into tbl_hour_timestamp values (88669, '2018-05-27 11:12:00', 2018), (40568, '2018-02-12 12:45:56', 2018), (40568, '2018-07-03 06:07:56', 2018) diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out index 74d891eefc1e..430591d0d01b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_drop_column.q.out @@ -27,8 +27,8 @@ POSTHOOK: Input: default@ice_tbl col_name data_type comment strcol string intcol int -pcol string -datecol date +pcol string Transform: identity +datecol date Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out index c68cb256cd60..97989584ccf7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out @@ -479,7 +479,6 @@ POSTHOOK: Input: default@ice_parquet_int # col_name data_type comment strcol string intcol int -pcol int # Partition Information # col_name data_type comment @@ -1676,8 +1675,6 @@ POSTHOOK: Input: default@ice_parquet_string # col_name data_type comment name string age int -country string -state string # Partition Information # col_name data_type comment @@ -2733,8 +2730,6 @@ POSTHOOK: Input: default@ice_parquet_string # col_name data_type comment name string age int -country string -state string # Partition Information # col_name data_type comment @@ -3430,7 +3425,6 @@ POSTHOOK: Input: default@ice_parquet_bigint # col_name data_type comment datecol date intcol int -pcol bigint # Partition Information # col_name data_type comment @@ -4124,7 +4118,6 @@ POSTHOOK: Input: default@ice_parquet_double # col_name data_type comment datecol date intcol int -pcol double # Partition Information # col_name data_type comment @@ -4818,7 +4811,6 @@ POSTHOOK: Input: default@ice_parquet_decimal # col_name data_type comment datecol date intcol int -pcol decimal(10,6) # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out index fcd1c17b24ed..f25a8b68fa3a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out @@ -610,7 +610,6 @@ POSTHOOK: Input: default@ice_parquet_date_transform_year # col_name data_type comment bigintcol bigint intcol int -pcol date # Partition Information # col_name data_type comment @@ -760,8 +759,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -877,8 +876,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -994,8 +993,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1117,8 +1116,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1227,8 +1226,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1305,7 +1304,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_month # col_name data_type comment bigintcol bigint -pcol date intcol int # Partition Information @@ -1456,8 +1454,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1573,8 +1571,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1690,8 +1688,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1813,8 +1811,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1923,8 +1921,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2000,7 +1998,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_day POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_day # col_name data_type comment -pcol date bigintcol bigint intcol int @@ -2152,8 +2149,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2269,8 +2266,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2378,8 +2375,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2455,7 +2452,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_truncate POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_truncate # col_name data_type comment -pcol string bigintcol bigint intcol int @@ -2591,8 +2587,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2708,8 +2704,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2817,8 +2813,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_bucket Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2894,7 +2890,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_bucket POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_bucket # col_name data_type comment -pcol string bigintcol bigint intcol int @@ -3123,7 +3118,6 @@ POSTHOOK: query: describe formatted ice_parquet_decimal_transform_bucket POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_decimal_transform_bucket # col_name data_type comment -pcol decimal(38,0) # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out index de49a0d3b366..f0e686e46014 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out @@ -162,7 +162,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testice1000 # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out index 063b6389863e..2f55ccfbc41d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out @@ -261,7 +261,6 @@ POSTHOOK: Input: default@ice_parquet_int # col_name data_type comment strcol string intcol int -pcol int # Partition Information # col_name data_type comment @@ -1232,8 +1231,6 @@ POSTHOOK: Input: default@ice_parquet_string # col_name data_type comment name string age int -country string -state string # Partition Information # col_name data_type comment @@ -1715,7 +1712,6 @@ POSTHOOK: Input: default@ice_parquet_date # col_name data_type comment bigintcol bigint intcol int -pcol date # Partition Information # col_name data_type comment @@ -2183,7 +2179,6 @@ POSTHOOK: Input: default@ice_parquet_bigint # col_name data_type comment datecol date intcol int -pcol bigint # Partition Information # col_name data_type comment @@ -2651,7 +2646,6 @@ POSTHOOK: Input: default@ice_parquet_double # col_name data_type comment datecol date intcol int -pcol double # Partition Information # col_name data_type comment @@ -3119,7 +3113,6 @@ POSTHOOK: Input: default@ice_parquet_decimal # col_name data_type comment datecol date intcol int -pcol decimal(10,6) # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out index 12745b17f098..e1764f671867 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out @@ -606,7 +606,6 @@ POSTHOOK: Input: default@ice_parquet_date_transform_year # col_name data_type comment bigintcol bigint intcol int -pcol date # Partition Information # col_name data_type comment @@ -734,8 +733,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -851,8 +850,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -968,8 +967,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1087,8 +1086,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1197,8 +1196,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_month Select Operator - expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: int) - outputColumnNames: bigintcol, pcol, intcol + expressions: _col0 (type: bigint), _col2 (type: int), _col1 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(bigintcol), max(bigintcol), count(1), count(bigintcol), compute_bit_vector_hll(bigintcol), min(pcol), max(pcol), count(pcol), compute_bit_vector_hll(pcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1275,7 +1274,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_month # col_name data_type comment bigintcol bigint -pcol date intcol int # Partition Information @@ -1404,8 +1402,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1521,8 +1519,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1638,8 +1636,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1761,8 +1759,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1871,8 +1869,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_day Select Operator - expressions: _col0 (type: date), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: date) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(pcol), max(pcol), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -1948,7 +1946,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_day POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_day # col_name data_type comment -pcol date bigintcol bigint intcol int @@ -2100,8 +2097,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2217,8 +2214,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2326,8 +2323,8 @@ STAGE PLANS: serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe name: default.ice_parquet_date_transform_truncate Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: int) - outputColumnNames: pcol, bigintcol, intcol + expressions: _col1 (type: bigint), _col2 (type: int), _col0 (type: string) + outputColumnNames: bigintcol, intcol, pcol Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(length(pcol)), avg(COALESCE(length(pcol),0)), count(1), count(pcol), compute_bit_vector_hll(pcol), min(bigintcol), max(bigintcol), count(bigintcol), compute_bit_vector_hll(bigintcol), min(intcol), max(intcol), count(intcol), compute_bit_vector_hll(intcol) @@ -2403,7 +2400,6 @@ POSTHOOK: query: describe formatted ice_parquet_date_transform_truncate POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_parquet_date_transform_truncate # col_name data_type comment -pcol string bigintcol bigint intcol int diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out index a364be2dedf7..bba70b438c06 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out @@ -35,31 +35,31 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 2 vectorized - File Output Operator [FS_18] + File Output Operator [FS_20] table:{"name:":"default.ice_parquet"} - Select Operator [SEL_17] + Select Operator [SEL_19] Output:["_col0","_col1","_col2","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_13] + SHUFFLE [RS_15] PartitionCols:_col2 - Select Operator [SEL_12] (rows=77 width=187) + Select Operator [SEL_14] (rows=77 width=187) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=77 width=187) default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"] Reducer 3 vectorized - File Output Operator [FS_21] - Select Operator [SEL_20] (rows=38 width=187) + File Output Operator [FS_23] + Select Operator [SEL_22] (rows=38 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Group By Operator [GBY_19] (rows=38 width=187) + Group By Operator [GBY_21] (rows=38 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_16] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=77 width=187) + Group By Operator [GBY_17] (rows=77 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"],keys:pcol - Select Operator [SEL_14] (rows=77 width=187) + Select Operator [SEL_16] (rows=77 width=187) Output:["strcol","intcol","pcol"] - Please refer to the previous Select Operator [SEL_12] + Please refer to the previous Select Operator [SEL_14] PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE `ice_parquet` PREHOOK: type: QUERY @@ -90,31 +90,31 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 2 vectorized - File Output Operator [FS_18] + File Output Operator [FS_20] table:{"name:":"default.ice_parquet"} - Select Operator [SEL_17] + Select Operator [SEL_19] Output:["_col0","_col1","_col2","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_13] + SHUFFLE [RS_15] PartitionCols:_col2 - Select Operator [SEL_12] (rows=77/6 width=187) + Select Operator [SEL_14] (rows=77/6 width=187) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=77/6 width=187) default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"] Reducer 3 vectorized - File Output Operator [FS_21] - Select Operator [SEL_20] (rows=38/3 width=187) + File Output Operator [FS_23] + Select Operator [SEL_22] (rows=38/3 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Group By Operator [GBY_19] (rows=38/3 width=187) + Group By Operator [GBY_21] (rows=38/3 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_16] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=77/3 width=187) + Group By Operator [GBY_17] (rows=77/3 width=187) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"],keys:pcol - Select Operator [SEL_14] (rows=77/6 width=187) + Select Operator [SEL_16] (rows=77/6 width=187) Output:["strcol","intcol","pcol"] - Please refer to the previous Select Operator [SEL_12] + Please refer to the previous Select Operator [SEL_14] PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet PREHOOK: type: QUERY diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out index 5036ca420f88..1543f272a9a9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out @@ -89,8 +89,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -172,8 +170,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -263,8 +259,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -352,8 +346,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -427,8 +419,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index f8dfb22e5fa1..530185c19150 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -113,7 +113,7 @@ Stage-0 File Output Operator [FS_10] Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=3 width=336) + Map Join Operator [MAPJOIN_25] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -177,7 +177,7 @@ Stage-0 File Output Operator [FS_14] Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Map Join Operator [MAPJOIN_39] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] @@ -247,7 +247,7 @@ Stage-0 File Output Operator [FS_10] Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=3 width=336) + Map Join Operator [MAPJOIN_25] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -311,7 +311,7 @@ Stage-0 File Output Operator [FS_14] Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Map Join Operator [MAPJOIN_39] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] @@ -378,23 +378,23 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_54] - Select Operator [SEL_53] (rows=3 width=520) + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=3 width=336) - BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_32] (rows=3 width=336) + BucketMapJoin:true,Conds:SEL_31._col0, _col1=RS_29._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_49] + MULTICAST [RS_29] PartitionCols:_col1 - Select Operator [SEL_48] (rows=3 width=168) + Select Operator [SEL_28] (rows=3 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=3 width=168) + Filter Operator [FIL_27] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_51] (rows=10 width=168) + <-Select Operator [SEL_31] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=10 width=260) + Filter Operator [FIL_30] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:7,Grouping Partition Columns:["decimal_col"],Output:["date_col","string_col","decimal_col"] @@ -442,30 +442,30 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_61] - Select Operator [SEL_60] (rows=3 width=520) + File Output Operator [FS_51] + Select Operator [SEL_50] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=3 width=336) - BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_49] (rows=3 width=336) + BucketMapJoin:true,Conds:SEL_48._col0, _col1=RS_46._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_56] + MULTICAST [RS_46] PartitionCols:_col1 - Group By Operator [GBY_55] (rows=3 width=168) + Group By Operator [GBY_45] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_54] + SHUFFLE [RS_44] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=3 width=168) + Group By Operator [GBY_43] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col - Select Operator [SEL_52] (rows=3 width=168) + Select Operator [SEL_42] (rows=3 width=168) Output:["date_col","decimal_col"] - Filter Operator [FIL_51] (rows=3 width=168) + Filter Operator [FIL_41] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_58] (rows=10 width=168) + <-Select Operator [SEL_48] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_57] (rows=10 width=260) + Filter Operator [FIL_47] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:7,Grouping Partition Columns:["decimal_col"],Output:["date_col","string_col","decimal_col"] @@ -512,23 +512,23 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_54] - Select Operator [SEL_53] (rows=3 width=520) + File Output Operator [FS_34] + Select Operator [SEL_33] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=3 width=336) - Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_32] (rows=3 width=336) + Conds:SEL_31._col0, _col1=RS_29._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_49] + BROADCAST [RS_29] PartitionCols:_col0, _col1 - Select Operator [SEL_48] (rows=3 width=168) + Select Operator [SEL_28] (rows=3 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=3 width=168) + Filter Operator [FIL_27] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_51] (rows=10 width=168) + <-Select Operator [SEL_31] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=10 width=260) + Filter Operator [FIL_30] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","string_col","decimal_col"] @@ -576,30 +576,30 @@ Stage-0 limit:-1 Stage-1 Map 1 vectorized, llap - File Output Operator [FS_61] - Select Operator [SEL_60] (rows=3 width=520) + File Output Operator [FS_51] + Select Operator [SEL_50] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=3 width=336) - Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_49] (rows=3 width=336) + Conds:SEL_48._col0, _col1=RS_46._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_56] + BROADCAST [RS_46] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=3 width=168) + Group By Operator [GBY_45] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_54] + SHUFFLE [RS_44] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=3 width=168) + Group By Operator [GBY_43] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col - Select Operator [SEL_52] (rows=3 width=168) + Select Operator [SEL_42] (rows=3 width=168) Output:["date_col","decimal_col"] - Filter Operator [FIL_51] (rows=3 width=168) + Filter Operator [FIL_41] (rows=3 width=168) predicate:(if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_3] (rows=7 width=168) default@source_table,source_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","decimal_col"] - <-Select Operator [SEL_58] (rows=10 width=168) + <-Select Operator [SEL_48] (rows=10 width=168) Output:["_col0","_col1"] - Filter Operator [FIL_57] (rows=10 width=260) + Filter Operator [FIL_47] (rows=10 width=260) predicate:((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) TableScan [TS_0] (rows=20 width=260) default@target_table,target_table,Tbl:COMPLETE,Col:COMPLETE,Output:["date_col","string_col","decimal_col"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out index 413fd81aa953..7ce866f2b03c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_2.q.out @@ -251,39 +251,39 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_48] - Select Operator [SEL_47] (rows=1 width=798) + File Output Operator [FS_38] + Select Operator [SEL_37] (rows=1 width=798) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_46] (rows=1 width=500) + Group By Operator [GBY_36] (rows=1 width=500) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - File Output Operator [FS_42] + File Output Operator [FS_32] table:{"name:":"default.bucketmapjoin_tmp_result"} - Select Operator [SEL_41] (rows=785 width=366) + Select Operator [SEL_31] (rows=785 width=366) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_40] (rows=785 width=186) - BucketMapJoin:true,Conds:RS_37._col0=SEL_39._col0(Inner),Output:["_col0","_col1","_col3"] + Map Join Operator [MAPJOIN_30] (rows=785 width=186) + BucketMapJoin:true,Conds:RS_27._col0=SEL_29._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_37] + MULTICAST [RS_27] PartitionCols:_col0 - Select Operator [SEL_36] (rows=238 width=95) + Select Operator [SEL_26] (rows=238 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=238 width=95) + Filter Operator [FIL_25] (rows=238 width=95) predicate:key is not null TableScan [TS_0] (rows=238 width=95) default@srcbucket_mapjoin_n0,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_39] (rows=1000 width=95) + <-Select Operator [SEL_29] (rows=1000 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=1000 width=95) + Filter Operator [FIL_28] (rows=1000 width=95) predicate:key is not null TableScan [TS_3] (rows=1000 width=95) default@srcbucket_mapjoin_part_n0,b,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key"],Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_44] (rows=1 width=704) + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector_hll(key)","max(length(value1))","avg(COALESCE(length(value1),0))","count(value1)","compute_bit_vector_hll(value1)","max(length(value2))","avg(COALESCE(length(value2),0))","count(value2)","compute_bit_vector_hll(value2)"] - Select Operator [SEL_43] (rows=785 width=366) + Select Operator [SEL_33] (rows=785 width=366) Output:["key","value1","value2"] - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_31] PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -415,39 +415,39 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_48] - Select Operator [SEL_47] (rows=1 width=798) + File Output Operator [FS_38] + Select Operator [SEL_37] (rows=1 width=798) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_46] (rows=1 width=500) + Group By Operator [GBY_36] (rows=1 width=500) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - File Output Operator [FS_42] + File Output Operator [FS_32] table:{"name:":"default.bucketmapjoin_tmp_result"} - Select Operator [SEL_41] (rows=809 width=366) + Select Operator [SEL_31] (rows=809 width=366) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_40] (rows=809 width=186) - BucketMapJoin:true,Conds:RS_37._col0=SEL_39._col0(Inner),Output:["_col0","_col1","_col3"] + Map Join Operator [MAPJOIN_30] (rows=809 width=186) + BucketMapJoin:true,Conds:RS_27._col0=SEL_29._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_37] + MULTICAST [RS_27] PartitionCols:_col0 - Select Operator [SEL_36] (rows=238 width=95) + Select Operator [SEL_26] (rows=238 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=238 width=95) + Filter Operator [FIL_25] (rows=238 width=95) predicate:key is not null TableScan [TS_0] (rows=238 width=95) default@srcbucket_mapjoin_n0,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_39] (rows=524 width=95) + <-Select Operator [SEL_29] (rows=524 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=524 width=95) + Filter Operator [FIL_28] (rows=524 width=95) predicate:key is not null TableScan [TS_3] (rows=524 width=95) default@srcbucket_mapjoin_part_2,b,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_44] (rows=1 width=704) + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector_hll(key)","max(length(value1))","avg(COALESCE(length(value1),0))","count(value1)","compute_bit_vector_hll(value1)","max(length(value2))","avg(COALESCE(length(value2),0))","count(value2)","compute_bit_vector_hll(value2)"] - Select Operator [SEL_43] (rows=809 width=366) + Select Operator [SEL_33] (rows=809 width=366) Output:["key","value1","value2"] - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_31] PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out index bd82280c9c07..f21a3c84da24 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_3.q.out @@ -122,27 +122,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_30] + Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_26] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_25._col0=RS_23._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_23] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_22] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_21] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_2_n4,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_25] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_24] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1_n1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] @@ -196,27 +196,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_35] + Group By Operator [GBY_34] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_31] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_27] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_26] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_2_n4,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_30] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_29] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1_n1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out index 4ddd445ecbfb..dd93fcf31851 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_4.q.out @@ -235,27 +235,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_30] + Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=1797 width=8) - Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_26] (rows=1797 width=8) + Conds:SEL_25._col0=RS_23._col0(Inner) <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + BROADCAST [RS_23] PartitionCols:_col0 - Select Operator [SEL_32] (rows=738 width=4) + Select Operator [SEL_22] (rows=738 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=738 width=89) + Filter Operator [FIL_21] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_3] (rows=738 width=89) default@srcbucket_mapjoin_part_2_n6,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=738 width=4) + <-Select Operator [SEL_25] (rows=738 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=738 width=89) + Filter Operator [FIL_24] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_0] (rows=738 width=89) default@srcbucket_mapjoin_part_1_n2,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] @@ -302,33 +302,33 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_49] - Group By Operator [GBY_48] (rows=1 width=8) + File Output Operator [FS_39] + Group By Operator [GBY_38] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_47] - Group By Operator [GBY_46] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_37] + Group By Operator [GBY_36] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_45] (rows=1797 width=8) - Conds:SEL_44._col0, _col1=RS_39._col0, _col1(Inner) + Map Join Operator [MAPJOIN_35] (rows=1797 width=8) + Conds:SEL_34._col0, _col1=RS_29._col0, _col1(Inner) <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_39] + BROADCAST [RS_29] PartitionCols:_col0, _col1 - Select Operator [SEL_38] (rows=738 width=89) + Select Operator [SEL_28] (rows=738 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_37] (rows=738 width=89) + Filter Operator [FIL_27] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_3] (rows=738 width=89) default@srcbucket_mapjoin_part_2_n6,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - Dynamic Partitioning Event Operator [EVENT_42] (rows=2 width=85) - Group By Operator [GBY_41] (rows=2 width=85) + Dynamic Partitioning Event Operator [EVENT_32] (rows=2 width=85) + Group By Operator [GBY_31] (rows=2 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_40] (rows=738 width=85) + Select Operator [SEL_30] (rows=738 width=85) Output:["_col0"] - Please refer to the previous Select Operator [SEL_38] - <-Select Operator [SEL_44] (rows=738 width=89) + Please refer to the previous Select Operator [SEL_28] + <-Select Operator [SEL_34] (rows=738 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=738 width=89) + Filter Operator [FIL_33] (rows=738 width=89) predicate:(part is not null and key is not null) TableScan [TS_0] (rows=738 width=89) default@srcbucket_mapjoin_part_1_n2,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out index 4b92f571e020..8ff5e300aa46 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_5.q.out @@ -174,27 +174,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_35] + Group By Operator [GBY_34] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_31] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_27] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_26] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_2_n0,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_30] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_29] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] @@ -241,27 +241,27 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_40] - Group By Operator [GBY_39] (rows=1 width=8) + File Output Operator [FS_30] + Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_38] - Group By Operator [GBY_37] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_36] (rows=372 width=8) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner) + Map Join Operator [MAPJOIN_26] (rows=372 width=8) + BucketMapJoin:true,Conds:SEL_25._col0=RS_23._col0(Inner) <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_23] PartitionCols:_col0 - Select Operator [SEL_32] (rows=238 width=4) + Select Operator [SEL_22] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_31] (rows=238 width=89) + Filter Operator [FIL_21] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_3] (rows=238 width=89) default@srcbucket_mapjoin_part_3,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","part"] - <-Select Operator [SEL_35] (rows=238 width=4) + <-Select Operator [SEL_25] (rows=238 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=238 width=89) + Filter Operator [FIL_24] (rows=238 width=89) predicate:((part = '1') and key is not null) TableScan [TS_0] (rows=238 width=89) default@srcbucket_mapjoin_part_1,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","part"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out index 11477c98ecc5..024fde7f1576 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_6.q.out @@ -141,37 +141,37 @@ Stage-3 Dependency Collection{} Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_48] - Select Operator [SEL_47] (rows=1 width=798) + File Output Operator [FS_38] + Select Operator [SEL_37] (rows=1 width=798) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_46] (rows=1 width=500) + Group By Operator [GBY_36] (rows=1 width=500) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","max(VALUE._col9)","avg(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] vectorized, llap - File Output Operator [FS_42] + File Output Operator [FS_32] table:{"name:":"default.bucketmapjoin_tmp_result_n3"} - Select Operator [SEL_41] (rows=809 width=366) + Select Operator [SEL_31] (rows=809 width=366) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_40] (rows=809 width=186) - BucketMapJoin:true,Conds:RS_37._col0=SEL_39._col0(Inner),Output:["_col0","_col1","_col3"] + Map Join Operator [MAPJOIN_30] (rows=809 width=186) + BucketMapJoin:true,Conds:RS_27._col0=SEL_29._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_37] + MULTICAST [RS_27] PartitionCols:_col0 - Select Operator [SEL_36] (rows=238 width=95) + Select Operator [SEL_26] (rows=238 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_35] (rows=238 width=95) + Filter Operator [FIL_25] (rows=238 width=95) predicate:key is not null TableScan [TS_0] (rows=238 width=95) default@srcbucket_mapjoin_n5,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_39] (rows=524 width=95) + <-Select Operator [SEL_29] (rows=524 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=524 width=95) + Filter Operator [FIL_28] (rows=524 width=95) predicate:key is not null TableScan [TS_3] (rows=524 width=95) default@srcbucket_mapjoin_part_2_n7,b,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:2,Grouping Partition Columns:["key"],Output:["key","value"] - PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_44] (rows=1 width=704) + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=704) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(key))","avg(COALESCE(length(key),0))","count(1)","count(key)","compute_bit_vector_hll(key)","max(length(value1))","avg(COALESCE(length(value1),0))","count(value1)","compute_bit_vector_hll(value1)","max(length(value2))","avg(COALESCE(length(value2),0))","count(value2)","compute_bit_vector_hll(value2)"] - Select Operator [SEL_43] (rows=809 width=366) + Select Operator [SEL_33] (rows=809 width=366) Output:["key","value1","value2"] - Please refer to the previous Select Operator [SEL_41] + Please refer to the previous Select Operator [SEL_31] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out index e32e34094e80..5c66fb5d0924 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out @@ -49,29 +49,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_61] - Limit [LIM_60] (rows=20 width=447) + File Output Operator [FS_46] + Limit [LIM_45] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_59] (rows=791 width=447) + Select Operator [SEL_44] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_58] - Top N Key Operator [TNK_57] (rows=791 width=447) + SHUFFLE [RS_43] + Top N Key Operator [TNK_42] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_56] (rows=791 width=447) - BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_41] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_40._col0, _col1=RS_38._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_53] + MULTICAST [RS_38] PartitionCols:_col0, _col1 - Select Operator [SEL_52] (rows=500 width=178) + Select Operator [SEL_37] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_51] (rows=500 width=178) + Filter Operator [FIL_36] (rows=500 width=178) predicate:(key is not null and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_55] (rows=500 width=269) + <-Select Operator [SEL_40] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=500 width=269) + Filter Operator [FIL_39] (rows=500 width=269) predicate:(key1 is not null and key2 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] @@ -147,29 +147,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_61] - Limit [LIM_60] (rows=20 width=447) + File Output Operator [FS_46] + Limit [LIM_45] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_59] (rows=473 width=447) + Select Operator [SEL_44] (rows=473 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_58] - Top N Key Operator [TNK_57] (rows=473 width=447) + SHUFFLE [RS_43] + Top N Key Operator [TNK_42] (rows=473 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_56] (rows=473 width=447) - BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_41] (rows=473 width=447) + BucketMapJoin:true,Conds:SEL_40._col0, _col1=RS_38._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_53] + MULTICAST [RS_38] PartitionCols:_col0, _col1 - Select Operator [SEL_52] (rows=387 width=178) + Select Operator [SEL_37] (rows=387 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_51] (rows=387 width=178) + Filter Operator [FIL_36] (rows=387 width=178) predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_55] (rows=387 width=269) + <-Select Operator [SEL_40] (rows=387 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=387 width=269) + Filter Operator [FIL_39] (rows=387 width=269) predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] @@ -245,29 +245,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=791 width=447) + Select Operator [SEL_34] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=791 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=791 width=447) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=500 width=178) + Select Operator [SEL_27] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=500 width=178) + Filter Operator [FIL_26] (rows=500 width=178) predicate:key is not null TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=500 width=269) + <-Select Operator [SEL_30] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=500 width=269) + Filter Operator [FIL_29] (rows=500 width=269) predicate:key1 is not null TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] @@ -343,29 +343,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=473 width=447) + Select Operator [SEL_34] (rows=473 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=473 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=473 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=473 width=447) - BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=473 width=447) + BucketMapJoin:true,Conds:SEL_30._col0=RS_28._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_33] + MULTICAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=178) + Select Operator [SEL_27] (rows=387 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=387 width=178) + Filter Operator [FIL_26] (rows=387 width=178) predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100'))) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=387 width=269) + <-Select Operator [SEL_30] (rows=387 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=387 width=269) + Filter Operator [FIL_29] (rows=387 width=269) predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100'))) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] @@ -443,29 +443,29 @@ Stage-0 limit:20 Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=612 width=447) + Select Operator [SEL_34] (rows=612 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=612 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=612 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=612 width=447) - Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=612 width=447) + Conds:RS_28._col0=SEL_30._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + BROADCAST [RS_28] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=269) + Select Operator [SEL_27] (rows=387 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=387 width=269) + Filter Operator [FIL_26] (rows=387 width=269) predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] - <-Select Operator [SEL_35] (rows=500 width=178) + <-Select Operator [SEL_30] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=500 width=178) + Filter Operator [FIL_29] (rows=500 width=178) predicate:key is not null TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -541,29 +541,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_41] - Limit [LIM_40] (rows=20 width=447) + File Output Operator [FS_36] + Limit [LIM_35] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=814 width=447) + Select Operator [SEL_34] (rows=814 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=814 width=447) + SHUFFLE [RS_33] + Top N Key Operator [TNK_32] (rows=814 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=814 width=447) - Conds:SEL_35._col1=RS_33._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_31] (rows=814 width=447) + Conds:SEL_30._col1=RS_28._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + BROADCAST [RS_28] PartitionCols:_col1 - Select Operator [SEL_32] (rows=500 width=178) + Select Operator [SEL_27] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=500 width=178) + Filter Operator [FIL_26] (rows=500 width=178) predicate:value is not null TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=500 width=269) + <-Select Operator [SEL_30] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=500 width=269) + Filter Operator [FIL_29] (rows=500 width=269) predicate:key2 is not null TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] @@ -637,29 +637,29 @@ Stage-0 limit:20 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_66] - Limit [LIM_65] (rows=20 width=447) + File Output Operator [FS_51] + Limit [LIM_50] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_64] (rows=791 width=447) + Select Operator [SEL_49] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_63] - Top N Key Operator [TNK_62] (rows=791 width=447) + SHUFFLE [RS_48] + Top N Key Operator [TNK_47] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_61] (rows=791 width=447) - BucketMapJoin:true,Conds:SEL_60._col0, _col1, _col2=RS_58._col0, _col1, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + Map Join Operator [MAPJOIN_46] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_45._col0, _col1, _col2=RS_43._col0, _col1, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_58] + MULTICAST [RS_43] PartitionCols:_col0, _col1 - Select Operator [SEL_57] (rows=500 width=178) + Select Operator [SEL_42] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_56] (rows=500 width=178) + Filter Operator [FIL_41] (rows=500 width=178) predicate:(key is not null and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_60] (rows=500 width=269) + <-Select Operator [SEL_45] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_59] (rows=500 width=269) + Filter Operator [FIL_44] (rows=500 width=269) predicate:(key1 is not null and key2 is not null and value is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out index e7a677f06282..148f2d89ee20 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_8.q.out @@ -147,25 +147,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=16 width=8) + File Output Operator [FS_32] + Select Operator [SEL_31] (rows=16 width=8) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_35] - Map Join Operator [MAPJOIN_34] (rows=16 width=8) - BucketMapJoin:true,Conds:SEL_33._col0=RS_31._col0(Inner),Output:["_col0","_col1"] + SHUFFLE [RS_30] + Map Join Operator [MAPJOIN_29] (rows=16 width=8) + BucketMapJoin:true,Conds:SEL_28._col0=RS_26._col0(Inner),Output:["_col0","_col1"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_31] + MULTICAST [RS_26] PartitionCols:_col0 - Select Operator [SEL_30] (rows=4 width=4) + Select Operator [SEL_25] (rows=4 width=4) Output:["_col0"] - Filter Operator [FIL_29] (rows=4 width=4) + Filter Operator [FIL_24] (rows=4 width=4) predicate:key1 is not null TableScan [TS_3] (rows=6 width=3) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1"] - <-Select Operator [SEL_33] (rows=16 width=8) + <-Select Operator [SEL_28] (rows=16 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=16 width=8) + Filter Operator [FIL_27] (rows=16 width=8) predicate:key1 is not null TableScan [TS_0] (rows=16 width=8) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","id"] @@ -231,25 +231,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_37] - Select Operator [SEL_36] (rows=16 width=92) + File Output Operator [FS_32] + Select Operator [SEL_31] (rows=16 width=92) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_35] - Map Join Operator [MAPJOIN_34] (rows=16 width=92) - BucketMapJoin:true,Conds:SEL_33._col0=RS_31._col0(Inner),Output:["_col0","_col1"] + SHUFFLE [RS_30] + Map Join Operator [MAPJOIN_29] (rows=16 width=92) + BucketMapJoin:true,Conds:SEL_28._col0=RS_26._col0(Inner),Output:["_col0","_col1"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_31] + MULTICAST [RS_26] PartitionCols:_col0 - Select Operator [SEL_30] (rows=4 width=87) + Select Operator [SEL_25] (rows=4 width=87) Output:["_col0"] - Filter Operator [FIL_29] (rows=4 width=87) + Filter Operator [FIL_24] (rows=4 width=87) predicate:key2 is not null TableScan [TS_3] (rows=6 width=72) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key2"] - <-Select Operator [SEL_33] (rows=16 width=92) + <-Select Operator [SEL_28] (rows=16 width=92) Output:["_col0","_col1"] - Filter Operator [FIL_32] (rows=16 width=92) + Filter Operator [FIL_27] (rows=16 width=92) predicate:key2 is not null TableScan [TS_0] (rows=16 width=92) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key2"],Output:["key2","id"] @@ -399,25 +399,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_57] - Select Operator [SEL_56] (rows=12 width=96) + File Output Operator [FS_42] + Select Operator [SEL_41] (rows=12 width=96) Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - Map Join Operator [MAPJOIN_54] (rows=12 width=96) - BucketMapJoin:true,Conds:SEL_53._col0, _col1=RS_51._col0, _col1(Inner),Output:["_col0","_col1","_col2"] + SHUFFLE [RS_40] + Map Join Operator [MAPJOIN_39] (rows=12 width=96) + BucketMapJoin:true,Conds:SEL_38._col0, _col1=RS_36._col0, _col1(Inner),Output:["_col0","_col1","_col2"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_51] + MULTICAST [RS_36] PartitionCols:_col0, _col1 - Select Operator [SEL_50] (rows=3 width=91) + Select Operator [SEL_35] (rows=3 width=91) Output:["_col0","_col1"] - Filter Operator [FIL_49] (rows=3 width=91) + Filter Operator [FIL_34] (rows=3 width=91) predicate:(key1 is not null and key2 is not null) TableScan [TS_3] (rows=6 width=75) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2"] - <-Select Operator [SEL_53] (rows=12 width=96) + <-Select Operator [SEL_38] (rows=12 width=96) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_52] (rows=12 width=96) + Filter Operator [FIL_37] (rows=12 width=96) predicate:(key1 is not null and key2 is not null) TableScan [TS_0] (rows=12 width=96) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:32,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","id"] @@ -479,25 +479,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_57] - Select Operator [SEL_56] (rows=9 width=78) + File Output Operator [FS_47] + Select Operator [SEL_46] (rows=9 width=78) Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - Map Join Operator [MAPJOIN_54] (rows=9 width=78) - BucketMapJoin:true,Conds:SEL_53._col0, _col1=RS_51._col0, _col1(Inner),Output:["_col0","_col1","_col2"] + SHUFFLE [RS_45] + Map Join Operator [MAPJOIN_44] (rows=9 width=78) + BucketMapJoin:true,Conds:SEL_43._col0, _col1=RS_41._col0, _col1(Inner),Output:["_col0","_col1","_col2"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_51] + MULTICAST [RS_41] PartitionCols:_col0 - Select Operator [SEL_50] (rows=3 width=93) + Select Operator [SEL_40] (rows=3 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_49] (rows=3 width=93) + Filter Operator [FIL_39] (rows=3 width=93) predicate:(key1 is not null and value is not null) TableScan [TS_3] (rows=6 width=77) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","value"] - <-Select Operator [SEL_53] (rows=12 width=83) + <-Select Operator [SEL_43] (rows=12 width=83) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_52] (rows=12 width=83) + Filter Operator [FIL_42] (rows=12 width=83) predicate:(value is not null and key1 is not null) TableScan [TS_0] (rows=16 width=81) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","value","id"] @@ -559,25 +559,25 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized, llap - File Output Operator [FS_67] - Select Operator [SEL_66] (rows=5 width=150) + File Output Operator [FS_52] + Select Operator [SEL_51] (rows=5 width=150) Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_65] - Map Join Operator [MAPJOIN_64] (rows=5 width=150) - BucketMapJoin:true,Conds:SEL_63._col0, _col1, _col2=RS_61._col0, _col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3"] + SHUFFLE [RS_50] + Map Join Operator [MAPJOIN_49] (rows=5 width=150) + BucketMapJoin:true,Conds:SEL_48._col0, _col1, _col2=RS_46._col0, _col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 3 [CUSTOM_EDGE] vectorized, llap - MULTICAST [RS_61] + MULTICAST [RS_46] PartitionCols:_col0, _col1 - Select Operator [SEL_60] (rows=2 width=180) + Select Operator [SEL_45] (rows=2 width=180) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_59] (rows=2 width=180) + Filter Operator [FIL_44] (rows=2 width=180) predicate:(key1 is not null and key2 is not null and value is not null) TableScan [TS_3] (rows=6 width=150) default@src_small,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] - <-Select Operator [SEL_63] (rows=8 width=164) + <-Select Operator [SEL_48] (rows=8 width=164) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_62] (rows=8 width=164) + Filter Operator [FIL_47] (rows=8 width=164) predicate:(value is not null and key1 is not null and key2 is not null) TableScan [TS_0] (rows=12 width=164) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:32,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value","id"] diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out index 42f0631140fc..6308cdcb5ff5 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out @@ -640,7 +640,6 @@ POSTHOOK: Input: default@zorder_tsdl_test # col_name data_type comment ts timestamp dd double -ll int # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out index 981b8269ad51..115ad8903ffa 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out @@ -269,9 +269,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment @@ -866,9 +864,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out index 3af63c58b6c2..7235f28d6c0b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out @@ -138,7 +138,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -225,7 +224,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out index 924c6d9953fa..7e317557d953 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out @@ -130,7 +130,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint company_id bigint @@ -280,7 +279,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint company_id bigint diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out index 61a931d74a33..296194cb8912 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out @@ -225,8 +225,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment event_id int -event_time timestamp with local time zone -event_src string # Partition Information # col_name data_type comment @@ -336,8 +334,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment event_id int -event_time timestamp with local time zone -event_src string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out index bc786c1e8dc8..2a9c688f714d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out @@ -176,9 +176,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment @@ -289,9 +287,7 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint team_id bigint -company_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out index 5508bdca124c..33407f123d97 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out @@ -173,7 +173,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -276,7 +275,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -514,7 +512,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -621,7 +618,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out index 440f6334f114..2a1bfe817775 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out @@ -208,7 +208,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment fname string last_name string -dept_id bigint address string # Partition Information @@ -312,7 +311,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment fname string last_name string -dept_id bigint address string # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out index 4e120fb8c50d..90f23c830e76 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out @@ -177,9 +177,6 @@ POSTHOOK: Input: default@ice_orc_wo_evo # col_name data_type comment first_name string last_name string -dept_id bigint -city string -registration_date date # Partition Information # col_name data_type comment @@ -289,9 +286,6 @@ POSTHOOK: Input: default@ice_orc_wo_evo # col_name data_type comment first_name string last_name string -dept_id bigint -city string -registration_date date # Partition Information # col_name data_type comment @@ -408,9 +402,6 @@ POSTHOOK: Input: default@ice_orc_wo_evo # col_name data_type comment first_name string last_name string -dept_id bigint -city string -registration_date date # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out index 1d1143f4b635..5e24153c32a4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out @@ -158,9 +158,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -275,9 +272,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -392,9 +386,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -508,9 +499,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment @@ -624,9 +612,6 @@ POSTHOOK: Input: default@ice_orc # col_name data_type comment first_name string last_name string -registration_date date -dept_id bigint -city string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out index b01185bb6911..1341c5356ef8 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out @@ -106,7 +106,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information @@ -200,7 +199,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information @@ -308,7 +306,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information @@ -416,7 +413,6 @@ POSTHOOK: query: describe formatted ice_orc POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_orc # col_name data_type comment -a string b bigint # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out index fa5dcd05e101..8937967b79a7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out @@ -65,7 +65,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket_big # col_name data_type comment id string -key int value string # Partition Information @@ -183,7 +182,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@srcbucket_big # col_name data_type comment id string -key int value string # Partition Information diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out index 62070278505d..0d5d5d70771f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out @@ -94,7 +94,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -177,7 +176,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment @@ -286,7 +284,6 @@ POSTHOOK: Input: ice_comp@ice_orc # col_name data_type comment first_name string last_name string -dept_id bigint # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out index b4d8dcd592d5..9659606b1045 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_row_lineage_compactions.q.out @@ -135,7 +135,7 @@ POSTHOOK: query: alter table part_tbl set tblproperties ('compactor.threshold.ta POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: ice_comp_all@part_tbl POSTHOOK: Output: ice_comp_all@part_tbl -Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: merge into part_tbl t using (select 1 as id, 'p1_upd' as data, 10 as dept_id) s on t.dept_id = s.dept_id and t.id = s.id diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out index 5f31e752db3f..afe15f60d91c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out @@ -876,10 +876,10 @@ PREHOOK: Input: default@tbl_ice_mixed_parted POSTHOOK: query: describe tbl_ice_mixed_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_mixed_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out index f1a017215639..c215c4005a46 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_orc.q.out @@ -605,10 +605,10 @@ PREHOOK: Input: default@tbl_ice_orc_parted POSTHOOK: query: describe tbl_ice_orc_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_orc_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out index 2feda580b67a..38a56ad9bdb7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out @@ -508,10 +508,10 @@ PREHOOK: Input: default@tbl_ice_parquet_parted POSTHOOK: query: describe tbl_ice_parquet_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_parquet_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out index 7ea7605467ca..5d870e2466f1 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out @@ -48,7 +48,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat1 # col_name data_type comment c int -b string # Partition Information # col_name data_type comment @@ -140,7 +139,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat2 # col_name data_type comment c int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out index 770cc967d0bc..45f5be172bd3 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out @@ -47,8 +47,6 @@ POSTHOOK: query: describe formatted mat1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat1 # col_name data_type comment -b string -c int # Partition Information # col_name data_type comment @@ -141,8 +139,6 @@ POSTHOOK: query: describe formatted mat2 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@mat2 # col_name data_type comment -b string -c int # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out b/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out index a44d9394025b..3b9b57aa94bf 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out @@ -82,8 +82,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment @@ -174,8 +172,6 @@ orderid int quantity int itemid int tradets timestamp -p1 string -p2 string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out index 24fff78df880..c3c309b656aa 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out @@ -37,11 +37,6 @@ equality_delete_record_count bigint Count of records in equality d equality_delete_file_count int Count of equality delete files last_updated_at timestamp with local time zone Commit time of snapshot that last updated this partition last_updated_snapshot_id bigint Id of snapshot that last updated this partition - -# Partition Information -# col_name data_type comment -d_part int Transform: identity -e_part int Transform: identity PREHOOK: query: select * from default.ice1.partitions PREHOOK: type: QUERY PREHOOK: Input: default@ice1 diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out index be765c27120a..61ddfbc3baa2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out @@ -79,7 +79,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_truncate # col_name data_type comment a int -b string # Partition Information # col_name data_type comment @@ -187,7 +186,6 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_truncate # col_name data_type comment a int -b string # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out index 5d4e328faf21..750ea7e0ba6d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out @@ -287,14 +287,14 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[293][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[289][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product -Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product -Warning: Shuffle Join MERGEJOIN[295][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[288][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1061,14 +1061,14 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[293][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[289][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product -Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product -Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product -Warning: Shuffle Join MERGEJOIN[295][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[288][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1116,7 +1116,7 @@ POSTHOOK: query: insert into tbl_ice_other values (10, 'ten'), (333, 'hundred') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_other -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain update tbl_ice set b='Changed forever' where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1138,21 +1138,41 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 14 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 3 <- Reducer 14 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 2 (SIMPLE_EDGE) + Reducer 7 <- Map 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 10 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 9 <- Reducer 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: t2 + filterExpr: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: tbl_ice @@ -1221,97 +1241,20 @@ STAGE PLANS: Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: t2 - filterExpr: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized Reducer 10 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 12 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 11 Reduce Operator Tree: Merge Join Operator condition map: @@ -1327,7 +1270,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1373,20 +1316,182 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reducer 13 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) Reducer 14 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (row_number_window_0 = 1) (type: boolean) + Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), -1L (type: bigint), _col6 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'Changed forever' (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) + Reducer 5 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: string), VALUE._col7 (type: int), KEY.iceberg_bucket(_col5, 16) (type: int), KEY.iceberg_truncate(_col6, 3) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, iceberg_bucket(_col5, 16), iceberg_truncate(_col6, 3) + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat + output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat + serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe + name: default.tbl_ice + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 2 + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + Reducer 7 Reduce Operator Tree: Merge Join Operator condition map: @@ -1401,7 +1506,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: string), _col6 (type: bigint), _col7 (type: string), _col9 (type: boolean) - Reducer 3 + Reducer 8 Reduce Operator Tree: Merge Join Operator condition map: @@ -1429,7 +1534,7 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: string) Statistics: Num rows: 9 Data size: 2053 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: bigint), _col7 (type: string) - Reducer 4 + Reducer 9 Reduce Operator Tree: Merge Join Operator condition map: @@ -1450,101 +1555,8 @@ STAGE PLANS: Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Reducer 6 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: string), VALUE._col7 (type: int), KEY.iceberg_bucket(_col5, 16) (type: int), KEY.iceberg_truncate(_col6, 3) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, iceberg_bucket(_col5, 16), iceberg_truncate(_col6, 3) - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat - output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat - serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe - name: default.tbl_ice - Reducer 7 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint), _col6 (type: string), _col0 (type: int), 'Changed forever' (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Reducer 8 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col6 (type: string) - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col5 - raw input shape: - window functions: - window function definition - alias: row_number_window_0 - name: row_number - window function: GenericUDAFRowNumberEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 9 Data size: 1900 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (row_number_window_0 = 1) (type: boolean) - Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: int), _col4 (type: bigint), _col5 (type: string), -1L (type: bigint), _col6 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 4 Data size: 844 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: iceberg_bucket(_col5, 16) (type: int), iceberg_truncate(_col6, 3) (type: string) - Statistics: Num rows: 22 Data size: 3181 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int) - Union 5 - Vertex: Union 5 + Union 4 + Vertex: Union 4 Stage: Stage-2 Dependency Collection @@ -1563,7 +1575,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[217][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: update tbl_ice set b='Changed forever' where a in (select t1.a from tbl_ice t1 join tbl_ice_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -1615,7 +1627,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_standard_other POSTHOOK: Lineage: tbl_standard_other.a SCRIPT [] POSTHOOK: Lineage: tbl_standard_other.b SCRIPT [] -Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -2074,7 +2086,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[224][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: update tbl_ice set b='The last one' where a in (select t1.a from tbl_ice t1 join tbl_standard_other t2 on t1.a = t2.a) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out index 1f837c4b586e..c9938d7a2236 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_avro.q.out @@ -53,8 +53,8 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[64][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out index 84de34cb28b8..e7a6c6090bcf 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_orc.q.out @@ -53,8 +53,8 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[70][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out index ecc74a61c6c2..4d8f0d994b14 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_partitioned_parquet.q.out @@ -53,8 +53,8 @@ POSTHOOK: query: insert into tbl_ice values (444, 'hola', 800), (555, 'schola', POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice -Warning: Shuffle Join MERGEJOIN[66][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[70][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: update tbl_ice set b='Changed again' where a in (select a from tbl_ice where a <= 5) or c in (select c from tbl_ice where c > 800) PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out index 31676dd56a8c..6b7d2ee05aa4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_merge_mixed.q.out @@ -899,66 +899,66 @@ Stage-6 Dependency Collection{} Stage-4 Reducer 2 vectorized - File Output Operator [FS_64] + File Output Operator [FS_60] table:{"name:":"default.store_sales"} - Select Operator [SEL_63] + Select Operator [SEL_59] Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col3","iceberg_bucket(_col2, 3)"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_59] + SHUFFLE [RS_55] PartitionCols:_col3, iceberg_bucket(_col2, 3) - Select Operator [SEL_55] (rows=1 width=#Masked#) + Select Operator [SEL_51] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - Filter Operator [FIL_51] (rows=1 width=#Masked#) + Filter Operator [FIL_47] (rows=1 width=#Masked#) predicate:((_col24 = _col34) and (_col47 = _col37) and (_col36 = 2451181) and (floor((_col34 / 1000)) * 1000) BETWEEN 1000 AND 2000 and (_col30 < 0) and _col33 is null) - Select Operator [SEL_49] (rows=5 width=#Masked#) + Select Operator [SEL_45] (rows=5 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49"] - Map Join Operator [MAPJOIN_48] (rows=5 width=#Masked#) - BucketMapJoin:true,Conds:SEL_47._col2, _col1=RS_46._col8, _col7(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51"] + Map Join Operator [MAPJOIN_44] (rows=5 width=#Masked#) + BucketMapJoin:true,Conds:SEL_43._col2, _col1=RS_42._col8, _col7(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51"] <-Map 5 [CUSTOM_EDGE] vectorized - MULTICAST [RS_46] + MULTICAST [RS_42] PartitionCols:_col7 - Select Operator [SEL_45] (rows=2 width=#Masked#) + Select Operator [SEL_41] (rows=2 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27"] - Filter Operator [FIL_44] (rows=2 width=#Masked#) + Filter Operator [FIL_40] (rows=2 width=#Masked#) predicate:((ss_sold_date_sk = 2451181) and ss_item_sk is not null and ss_customer_sk is not null) TableScan [TS_2] (rows=2 width=#Masked#) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_list_price","ss_sales_price","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price","ss_ext_tax","ss_coupon_amt","ss_net_paid","ss_net_paid_inc_tax","ss_net_profit"] - <-Select Operator [SEL_47] (rows=5 width=#Masked#) + <-Select Operator [SEL_43] (rows=5 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] TableScan [TS_0] (rows=5 width=#Masked#) default@ssv,s,Tbl:COMPLETE,Col:NONE,Grouping Num Buckets:3,Grouping Partition Columns:["ss_item_sk2"],Output:["ss_sold_time_sk","ss_item_sk2","ss_customer_sk2","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_list_price","ss_sales_price","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price","ss_ext_tax","ss_coupon_amt","ss_net_paid","ss_net_paid_inc_tax","ss_net_profit"] Reducer 3 vectorized - File Output Operator [FS_66] + File Output Operator [FS_62] table:{"name:":"default.store_sales"} - Select Operator [SEL_65] + Select Operator [SEL_61] Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col3","iceberg_bucket(_col2, 3)"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_60] + SHUFFLE [RS_56] PartitionCols:_col3, iceberg_bucket(_col2, 3) - Select Operator [SEL_56] (rows=1 width=#Masked#) + Select Operator [SEL_52] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - Filter Operator [FIL_52] (rows=1 width=#Masked#) + Filter Operator [FIL_48] (rows=1 width=#Masked#) predicate:(_col24 is null and _col47 is null and _col36 is null) - Please refer to the previous Select Operator [SEL_49] + Please refer to the previous Select Operator [SEL_45] Reducer 4 vectorized - File Output Operator [FS_70] + File Output Operator [FS_66] table:{"name:":"default.merge_tmp_table"} - Select Operator [SEL_69] (rows=1 width=#Masked#) + Select Operator [SEL_65] (rows=1 width=#Masked#) Output:["_col0"] - Filter Operator [FIL_68] (rows=1 width=#Masked#) + Filter Operator [FIL_64] (rows=1 width=#Masked#) predicate:(_col4 > 1L) - Group By Operator [GBY_67] (rows=1 width=#Masked#) + Group By Operator [GBY_63] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_62] + SHUFFLE [RS_58] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_61] (rows=1 width=#Masked#) + Group By Operator [GBY_57] (rows=1 width=#Masked#) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count()"],keys:_col4, _col40, _col6, _col45 - Select Operator [SEL_57] (rows=1 width=#Masked#) + Select Operator [SEL_53] (rows=1 width=#Masked#) Output:["_col4","_col6","_col40","_col45"] - Filter Operator [FIL_53] (rows=1 width=#Masked#) + Filter Operator [FIL_49] (rows=1 width=#Masked#) predicate:((_col24 = _col34) and (_col47 = _col37) and (_col36 = 2451181) and (floor((_col34 / 1000)) * 1000) BETWEEN 1000 AND 2000 and (_col30 < 0)) - Please refer to the previous Select Operator [SEL_49] + Please refer to the previous Select Operator [SEL_45] Stage-7 Stats Work{} Stage-3 diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out index cd0ce562a725..36b5b988c492 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out @@ -757,10 +757,10 @@ PREHOOK: Input: default@tbl_ice_mixed_parted POSTHOOK: query: describe tbl_ice_mixed_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_mixed_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out index fdf2679f5b2a..0b7260d84206 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_orc.q.out @@ -526,10 +526,10 @@ PREHOOK: Input: default@tbl_ice_orc_parted POSTHOOK: query: describe tbl_ice_orc_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_orc_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out index acc7794e12ce..675262df24d7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out @@ -430,10 +430,10 @@ PREHOOK: Input: default@tbl_ice_parquet_parted POSTHOOK: query: describe tbl_ice_parquet_parted POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_parquet_parted -p1 string b string a int -p2 string +p1 string Transform: identity +p2 string Transform: identity # Partition Information # col_name data_type comment diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index e74885e57a3d..67e59155b67c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -31,7 +31,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; @@ -232,7 +231,7 @@ static VectorizedRowBatchCtx createFakeVrbCtx(MapWork mapWork) throws HiveExcept if (paths.hasNext()) { PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next()); if (partDesc != null) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && !partSpec.isEmpty()) { partitionColumnCount = partSpec.size(); } diff --git a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java index 120949fc9949..f4a008257bb8 100644 --- a/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java +++ b/ql/src/java/org/apache/hadoop/hive/llap/ProactiveEviction.java @@ -158,13 +158,13 @@ public static final class Request { // Holds a hierarchical structure of DBs, tables and partitions such as: // { testdb : { testtab0 : [], testtab1 : [ {pk0 : p0v0, pk1 : p0v1}, {pk0 : p1v0, pk1 : p1v1} ] }, testdb2 : {} } - private final Map>>> entities; + private final Map>>> entities; - private Request(Map>>> entities) { + private Request(Map>>> entities) { this.entities = entities; } - public Map>>> getEntities() { + public Map>>> getEntities() { return entities; } @@ -191,21 +191,21 @@ public List toProtoRequests() List protoRequests = new LinkedList<>(); - for (Map.Entry>>> dbEntry : entities.entrySet()) { + for (Map.Entry>>> dbEntry : entities.entrySet()) { String dbName = dbEntry.getKey(); - Map>> tables = dbEntry.getValue(); + Map>> tables = dbEntry.getValue(); LlapDaemonProtocolProtos.EvictEntityRequestProto.Builder requestBuilder = LlapDaemonProtocolProtos.EvictEntityRequestProto.newBuilder(); LlapDaemonProtocolProtos.TableProto.Builder tableBuilder = null; requestBuilder.setDbName(dbName.toLowerCase()); - for (Map.Entry>> tableEntry : tables.entrySet()) { + for (Map.Entry>> tableEntry : tables.entrySet()) { String tableName = tableEntry.getKey(); tableBuilder = LlapDaemonProtocolProtos.TableProto.newBuilder(); tableBuilder.setTableName(tableName.toLowerCase()); - Set> partitions = tableEntry.getValue(); + Set> partitions = tableEntry.getValue(); Set partitionKeys = null; for (Map partitionSpec : partitions) { @@ -245,7 +245,7 @@ public boolean isTagMatch(CacheTag cacheTag) { return false; } - Map>> tables = entities.get(db); + Map>> tables = entities.get(db); // If true, must be a drop DB event and this cacheTag matches. if (tables.isEmpty()) { @@ -261,7 +261,7 @@ public boolean isTagMatch(CacheTag cacheTag) { for (String tableAndDbName : tables.keySet()) { if (tableAndDbName.equals(tagTableName.getNotEmptyDbTable())) { - Set> partDescs = tables.get(tableAndDbName); + Set> partDescs = tables.get(tableAndDbName); // If true, must be a drop table event, and this cacheTag matches. if (partDescs == null) { @@ -292,7 +292,7 @@ public String toString() { */ public static final class Builder { - private final Map>>> entities; + private final Map>>> entities; private Builder() { this.entities = new HashMap<>(); @@ -302,7 +302,7 @@ public static Builder create() { return new Builder(); } - public Builder addPartitionOfATable(String db, String tableName, LinkedHashMap partSpec) { + public Builder addPartitionOfATable(String db, String tableName, Map partSpec) { ensureDb(db); ensureTable(db, tableName); entities.get(db).get(tableName).add(partSpec); @@ -325,7 +325,7 @@ public Request build() { } private void ensureDb(String dbName) { - Map>> tables = entities.get(dbName); + Map>> tables = entities.get(dbName); if (tables == null) { tables = new HashMap<>(); entities.put(dbName, tables); @@ -334,9 +334,9 @@ private void ensureDb(String dbName) { private void ensureTable(String dbName, String tableName) { ensureDb(dbName); - Map>> tables = entities.get(dbName); + Map>> tables = entities.get(dbName); - Set> partitions = tables.get(tableName); + Set> partitions = tables.get(tableName); if (partitions == null) { partitions = new HashSet<>(); tables.put(tableName, partitions); @@ -352,7 +352,7 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entities.clear(); String dbName = protoRequest.getDbName().toLowerCase(); - Map>> entitiesInDb = new HashMap<>(); + Map>> entitiesInDb = new HashMap<>(); List tables = protoRequest.getTableList(); if (tables != null && !tables.isEmpty()) { @@ -364,7 +364,7 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entitiesInDb.put(dbAndTableName, null); continue; } - Set> partitions = new HashSet<>(); + Set> partitions = new HashSet<>(); LinkedHashMap partDesc = new LinkedHashMap<>(); for (int valIx = 0; valIx < table.getPartValCount(); ++valIx) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java index 17a964a44583..6d543351f21c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AlterTableUtils.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.ddl.table; +import java.util.ArrayList; +import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -75,7 +78,7 @@ public static boolean isSchemaEvolutionEnabled(Table table, Configuration conf) } public static boolean isFullPartitionSpec(Table table, Map partitionSpec) { - for (FieldSchema partitionCol : table.getPartCols()) { + for (FieldSchema partitionCol : table.getPartitionKeys()) { if (partitionSpec.get(partitionCol.getName()) == null) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index 289479b7ee79..87d115df72ed 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -66,10 +66,7 @@ private List getColumnsByPattern() throws HiveException { private List getCols() throws HiveException { Table table = context.getDb().getTable(desc.getTableName()); - List allColumns = new ArrayList<>(); - allColumns.addAll(table.getCols()); - allColumns.addAll(table.getPartCols()); - return allColumns; + return new ArrayList<>(table.getAllCols()); } private Matcher getMatcher() { @@ -94,13 +91,7 @@ private List filterColumns(List columns, Matcher match } if (desc.isSorted()) { - result.sort( - new Comparator() { - @Override - public int compare(FieldSchema f1, FieldSchema f2) { - return f1.getName().compareTo(f2.getName()); - } - }); + result.sort(Comparator.comparing(FieldSchema::getName)); } return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java index b3d37fb9c40a..3aadf916a470 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableOperation.java @@ -161,7 +161,7 @@ private void createTableNonReplaceMode(Table tbl) throws HiveException { if (desc.isCTAS()) { Table createdTable = context.getDb().getTable(tbl.getDbName(), tbl.getTableName()); DataContainer dc = new DataContainer(createdTable.getTTable()); - context.getQueryState().getLineageState().setLineage(createdTable.getPath(), dc, createdTable.getCols()); + context.getQueryState().getLineageState().setLineage(createdTable.getPath(), dc, createdTable.getStorageSchemaCols()); // We did not create the table before moving the data files for a non-partitioned table i.e // we used load file instead of load table (see SemanticAnalyzer#getFileSinkPlan() for diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index 9086ad90c677..8d45892e17fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -200,7 +200,7 @@ private void getColumnDataColPathSpecified(Table table, Partition part, List tableProps = table.getParameters() == null ? new HashMap<>() : table.getParameters(); - if (partitionCol != null) { + if (partitionCol != null && !table.hasNonNativePartitionSupport()) { addStatsForPartitionKeyColumn(table, colStats, colNames, tableProps, partitionCol); } else { addStatsForRegularColumn(table, colStats, colName, tableProps); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java index 75f39291cd07..dc6c3680ca1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/formatter/TextDescTableFormatter.java @@ -59,7 +59,6 @@ import java.io.DataOutputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; @@ -70,7 +69,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.Map.Entry; -import java.util.stream.Collectors; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; import static org.apache.hadoop.hive.ql.ddl.ShowUtils.ALIGNMENT; @@ -171,13 +169,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column boolean isFormatted, boolean isOutputPadded) throws IOException { String partitionData = ""; if (columnPath == null) { - List partitionColumns = null; - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - if (table.isPartitioned()) { - partitionColumns = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : - table.getPartCols(); - } + List partitionColumns = table.isPartitioned() ? table.getPartCols() : null; if (CollectionUtils.isNotEmpty(partitionColumns) && conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { TextMetaDataTable metaDataTable = new TextMetaDataTable(); @@ -204,13 +196,9 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column } private void addFormattedTableData(DataOutputStream out, Table table, Partition partition, boolean isOutputPadded) - throws IOException, UnsupportedEncodingException { - String formattedTableInfo = null; - if (partition != null) { - formattedTableInfo = getPartitionInformation(table, partition); - } else { - formattedTableInfo = getTableInformation(table, isOutputPadded); - } + throws IOException { + String formattedTableInfo = (partition != null) ? getPartitionInformation(table, partition) : + getTableInformation(table, isOutputPadded); if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { formattedTableInfo += getConstraintsInformation(table); @@ -337,7 +325,7 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List skewedCoumnNames = storageDesc.getSkewedInfo().getSkewedColNames().stream() .sorted() - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Columns:", skewedCoumnNames.toString(), tableInfo); } @@ -345,16 +333,16 @@ private void getStorageDescriptorInfo(StringBuilder tableInfo, Table table, Stor List> skewedColumnValues = storageDesc.getSkewedInfo().getSkewedColValues().stream() .sorted(new VectorComparator()) - .collect(Collectors.toList()); + .toList(); formatOutput("Skewed Values:", skewedColumnValues.toString(), tableInfo); } - Map, String> skewedColMap = new TreeMap<>(new VectorComparator()); + Map, String> skewedColMap = new TreeMap<>(new VectorComparator<>()); skewedColMap.putAll(storageDesc.getSkewedInfo().getSkewedColValueLocationMaps()); if (MapUtils.isNotEmpty(skewedColMap)) { formatOutput("Skewed Value to Path:", skewedColMap.toString(), tableInfo); Map, String> truncatedSkewedColMap = - new TreeMap, String>(new VectorComparator()); + new TreeMap<>(new VectorComparator<>()); // walk through existing map to truncate path so that test won't mask it then we can verify location is right Set, String>> entries = skewedColMap.entrySet(); for (Entry, String> entry : entries) { @@ -403,7 +391,7 @@ private void getPartitionMetaDataInformation(StringBuilder tableInfo, Partition } } - private class VectorComparator> implements Comparator>{ + private static final class VectorComparator> implements Comparator> { @Override public int compare(List listA, List listB) { for (int i = 0; i < listA.size() && i < listB.size(); i++) { @@ -438,7 +426,7 @@ private void displayAllParameters(Map params, StringBuilder tabl private void displayAllParameters(Map params, StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded) { - List keys = new ArrayList(params.keySet()); + List keys = new ArrayList<>(params.keySet()); Collections.sort(keys); for (String key : keys) { String value = params.get(key); @@ -626,7 +614,7 @@ private void addExtendedTableData(DataOutputStream out, Table table, Partition p } private void addExtendedConstraintData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getTableConstraintsInfo().isTableConstraintsInfoNotEmpty()) { out.write(("Constraints").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.tabCode); @@ -658,7 +646,7 @@ private void addExtendedConstraintData(DataOutputStream out, Table table) } private void addExtendedStorageData(DataOutputStream out, Table table) - throws IOException, UnsupportedEncodingException { + throws IOException { if (table.getStorageHandlerInfo() != null) { out.write(("StorageHandlerInfo").getBytes(StandardCharsets.UTF_8)); out.write(Utilities.newLineCode); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java index db7a5dfcd3d0..5882e4616506 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/PartitionUtils.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Map.Entry; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java index b5bb11bcea2a..447ac10aeb22 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/compact/AlterTableCompactAnalyzer.java @@ -99,7 +99,7 @@ protected void analyzeCommand(TableName tableName, Map partition Table table; try { table = getDb().getTable(tableName); - for (FieldSchema fs : table.getCols()) { + for (FieldSchema fs : table.getStorageSchemaCols()) { TypeInfo columnType = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); rwsch.put(tableName.getTable(), fs.getName(), new ColumnInfo(fs.getName(), columnType, null, true)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index ebe8f2f52775..e333ed85f439 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -24,7 +24,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -254,7 +253,7 @@ public static String getPartialName(Partition p, int level) throws HiveException * @throws HiveException */ public static String conflictingArchiveNameOrNull(Hive db, Table tbl, - LinkedHashMap partSpec) + Map partSpec) throws HiveException { List partKeys = tbl.getPartitionKeys(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index a5bc66733f46..3487dc443db1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -38,7 +38,6 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; @@ -910,7 +909,7 @@ private String getExternal(Table table) { private String getColumns(Table table) { List columnDescs = new ArrayList<>(); - for (FieldSchema column : table.getCols()) { + for (FieldSchema column : table.getStorageSchemaCols()) { String columnType = formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType())); String columnDesc = " " + unparseIdentifier(column.getName()) + " " + columnType; if (column.getComment() != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index a0906cfb0339..38c4dfb036b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.LinkedHashMap; import java.util.Map; import java.util.stream.IntStream; @@ -287,7 +286,7 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDesc partDesc, Object[] partitionValues) { - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); for (int i = 0; i < vrbCtx.partitionColumnCount; i++) { Object objectValue; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java index c188eb09fdcf..9f871d05feb3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/DummyPartition.java @@ -90,10 +90,7 @@ public List getValues() { Table table = this.getTable(); values = new ArrayList<>(); - // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation - for (FieldSchema fs : table.hasNonNativePartitionSupport() - ? table.getStorageHandler().getPartitionKeys(table) - : table.getPartCols()) { + for (FieldSchema fs : table.getPartCols()) { String val = partSpec.get(fs.getName()); values.add(val); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 0cf02a95e392..ee55fde100f6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -430,10 +429,10 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable // 1.2 Add column info corresponding to partition columns ArrayList partitionColumns = new ArrayList(); - for (FieldSchema part_col : viewTable.getPartCols()) { - colName = part_col.getName(); + for (FieldSchema partCol : viewTable.getPartCols()) { + colName = partCol.getName(); colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), null, true); + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), null, true); rr.put(null, colName, colInfo); cInfoLst.add(colInfo); partitionColumns.add(colInfo); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 736e6e8c9f1a..4715775d3b4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -124,7 +123,7 @@ public Partition(Table tbl, Map partSpec, Path location) throws public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject( Table tbl, Map partSpec, Path location) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : tbl.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null || val.isEmpty()) { @@ -173,7 +172,8 @@ protected void initialize(Table table, // set default if location is not set and this is a physical // table partition (not a view partition) if (table.getDataLocation() != null) { - Path partPath = new Path(table.getDataLocation(), Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); + Path partPath = new Path(table.getDataLocation(), + Warehouse.makePartName(table.getPartCols(), tPartition.getValues())); tPartition.getSd().setLocation(partPath.toString()); } } @@ -416,7 +416,7 @@ public Path[] getPath(Sample s) throws HiveException { } int scount = s.getSampleFraction(); - ArrayList ret = new ArrayList(); + List ret = new ArrayList<>(); if (bcount == scount) { ret.add(getBucketPath(s.getSampleNum() - 1)); @@ -428,7 +428,7 @@ public Path[] getPath(Sample s) throws HiveException { } // undersampling a bucket ret.add(getBucketPath((s.getSampleNum() - 1) % bcount)); - } else if (bcount > scount) { + } else { if ((bcount / scount) * scount != bcount) { throw new HiveException("Sample Count" + scount + " is not a divisor of bucket count " + bcount + " for table " @@ -439,11 +439,11 @@ public Path[] getPath(Sample s) throws HiveException { ret.add(getBucketPath(i * scount + (s.getSampleNum() - 1))); } } - return (ret.toArray(new Path[ret.size()])); + return (ret.toArray(new Path[0])); } } - public LinkedHashMap getSpec() { + public Map getSpec() { return table.createSpec(tPartition); } @@ -542,7 +542,7 @@ public void setLocation(String location) { */ public void setValues(Map partSpec) throws HiveException { - List pvals = new ArrayList(); + List pvals = new ArrayList<>(); for (FieldSchema field : table.getPartCols()) { String val = partSpec.get(field.getName()); if (val == null) { @@ -582,12 +582,11 @@ public List getSkewedColNames() { return tPartition.getSd().getSkewedInfo().getSkewedColNames(); } - public void setSkewedValueLocationMap(List valList, String dirName) - throws HiveException { + public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tPartition.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tPartition.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -612,8 +611,7 @@ public int hashCode() { @Override public boolean equals(Object obj) { - if (obj instanceof Partition) { - Partition o = (Partition) obj; + if (obj instanceof Partition o) { return Objects.equals(tPartition, o.tPartition); } return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 05faa38d7caf..54cf9ac80e7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata; import java.io.IOException; +import java.io.Serial; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -32,10 +33,12 @@ import java.util.Properties; import java.util.Set; import java.util.stream.Collectors; + import java.util.stream.Stream; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -48,6 +51,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; import org.apache.hadoop.hive.metastore.api.SourceTable; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -101,6 +105,7 @@ */ public class Table implements Serializable { + @Serial private static final long serialVersionUID = 1L; static final private Logger LOG = LoggerFactory.getLogger("hive.ql.metadata.Table"); @@ -110,6 +115,8 @@ public class Table implements Serializable { /** * These fields are all cached fields. The information comes from tTable. */ + private List tablePartCols; + private Map> inputColnameToIndFsMap; private transient Deserializer deserializer; private Class outputFormatClass; private Class inputFormatClass; @@ -194,6 +201,9 @@ public Table makeCopy() { newTab.setMetaTable(this.getMetaTable()); newTab.setSnapshotRef(this.getSnapshotRef()); + if (this.tablePartCols != null) { + newTab.tablePartCols = new ArrayList<>(this.tablePartCols); + } return newTab; } @@ -226,11 +236,11 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { { sd.setSerdeInfo(new SerDeInfo()); sd.setNumBuckets(-1); - sd.setBucketCols(new ArrayList()); - sd.setCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.setSortCols(new ArrayList()); - sd.getSerdeInfo().setParameters(new HashMap()); + sd.setBucketCols(new ArrayList<>()); + sd.setCols(new ArrayList<>()); + sd.setParameters(new HashMap<>()); + sd.setSortCols(new ArrayList<>()); + sd.getSerdeInfo().setParameters(new HashMap<>()); // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does // not support a table with no columns. sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); @@ -240,17 +250,17 @@ public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { sd.setInputFormat(SequenceFileInputFormat.class.getName()); sd.setOutputFormat(HiveSequenceFileOutputFormat.class.getName()); SkewedInfo skewInfo = new SkewedInfo(); - skewInfo.setSkewedColNames(new ArrayList()); - skewInfo.setSkewedColValues(new ArrayList>()); - skewInfo.setSkewedColValueLocationMaps(new HashMap, String>()); + skewInfo.setSkewedColNames(new ArrayList<>()); + skewInfo.setSkewedColValues(new ArrayList<>()); + skewInfo.setSkewedColValueLocationMaps(new HashMap<>()); sd.setSkewedInfo(skewInfo); } org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); { t.setSd(sd); - t.setPartitionKeys(new ArrayList()); - t.setParameters(new HashMap()); + t.setPartitionKeys(new ArrayList<>()); + t.setParameters(new HashMap<>()); t.setTableType(TableType.MANAGED_TABLE.toString()); t.setDbName(databaseName); t.setTableName(tableName); @@ -403,7 +413,7 @@ public void setStorageHandlerInfo(StorageHandlerInfo storageHandlerInfo) { this.storageHandlerInfo = storageHandlerInfo; } - final public Class getInputFormatClass() { + public final Class getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); @@ -423,7 +433,7 @@ final public Class getInputFormatClass() { return inputFormatClass; } - final public Class getOutputFormatClass() { + public final Class getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); @@ -457,7 +467,7 @@ public void setMaterializedTable(boolean materializedTable) { * Marker SemanticException, so that processing that allows for table validation failures * and appropriately handles them can recover from these types of SemanticExceptions */ - public class ValidationFailureSemanticException extends SemanticException{ + public static class ValidationFailureSemanticException extends SemanticException{ public ValidationFailureSemanticException(String s) { super(s); } @@ -527,9 +537,9 @@ public TableType getTableType() { return Enum.valueOf(TableType.class, tTable.getTableType()); } - public ArrayList getFields() { + public List getFields() { - ArrayList fields = new ArrayList(); + List fields = new ArrayList<>(); try { Deserializer decoder = getDeserializer(); @@ -595,26 +605,47 @@ public boolean equals(Object obj) { && Objects.equals(snapshotRef, other.snapshotRef); } + + /** + * Returns partition columns, consulting the storage handler for non-native tables (e.g. Iceberg) + * where partition columns are not stored in the metastore. + */ public List getPartCols() { - List partKeys = tTable.getPartitionKeys(); - if (partKeys == null) { - partKeys = new ArrayList<>(); - tTable.setPartitionKeys(partKeys); + if (tablePartCols != null) { + return tablePartCols; + } + if (isTableTypeSet() && hasNonNativePartitionSupport()) { + List partCols = getStorageHandler().getPartitionKeys(this); + for (FieldSchema partCol : partCols) { + FieldSchema storageSchemaField = getFieldSchemaByName(partCol.getName()); + String storageSchemaComment = storageSchemaField.getComment(); + if (storageSchemaComment != null) { + partCol.setComment(storageSchemaComment); + } + } + tablePartCols = partCols; + } else { + tablePartCols = getPartitionKeys(); } - return partKeys; + return tablePartCols; + } + + private boolean isTableTypeSet() { + if (tTable.getParameters() == null) { + return false; + } + String tableType = tTable.getParameters().get(HiveMetaHook.TABLE_TYPE); + return tableType != null; } public FieldSchema getPartColByName(String colName) { return getPartCols().stream() - .filter(key -> key.getName().toLowerCase().equals(colName)) - .findFirst().orElse(null); + .filter(key -> key.getName().toLowerCase().equals(colName)) + .findFirst().orElse(null); } public List getPartColNames() { - List partCols = hasNonNativePartitionSupport() ? - getStorageHandler().getPartitionKeys(this) : getPartCols(); - return partCols.stream().map(FieldSchema::getName) - .collect(Collectors.toList()); + return getPartCols().stream().map(FieldSchema::getName).toList(); } public boolean hasNonNativePartitionSupport() { @@ -672,7 +703,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { Map, String> mappings = tTable.getSd().getSkewedInfo() .getSkewedColValueLocationMaps(); if (null == mappings) { - mappings = new HashMap, String>(); + mappings = new HashMap<>(); tTable.getSd().getSkewedInfo().setSkewedColValueLocationMaps(mappings); } @@ -682,7 +713,7 @@ public void setSkewedValueLocationMap(List valList, String dirName) { public Map, String> getSkewedColValueLocationMaps() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValueLocationMaps() : new HashMap, String>(); + .getSkewedColValueLocationMaps() : new HashMap<>(); } public void setSkewedColValues(List> skewedValues) { @@ -691,7 +722,7 @@ public void setSkewedColValues(List> skewedValues) { public List> getSkewedColValues(){ return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColValues() : new ArrayList>(); + .getSkewedColValues() : new ArrayList<>(); } public void setSkewedColNames(List skewedColNames) { @@ -700,7 +731,7 @@ public void setSkewedColNames(List skewedColNames) { public List getSkewedColNames() { return (tTable.getSd().getSkewedInfo() != null) ? tTable.getSd().getSkewedInfo() - .getSkewedColNames() : new ArrayList(); + .getSkewedColNames() : new ArrayList<>(); } public SkewedInfo getSkewedInfo() { @@ -728,10 +759,50 @@ private boolean isField(String col) { return false; } - public List getCols() { + private void fillColumnIndexByName() { + inputColnameToIndFsMap = new HashMap<>(); + List fsList = new ArrayList<>(getColsInternal(false)); + if (!isNonNative()) { + fsList.addAll(getPartitionKeys()); + } + for (int i = 0; i < fsList.size(); i++) { + inputColnameToIndFsMap.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i))); + } + } + + public int getColumnIndexByName(String colName) { + if (inputColnameToIndFsMap == null) { + fillColumnIndexByName(); + } + return inputColnameToIndFsMap.get(colName.toLowerCase()).getLeft(); + } + + public FieldSchema getFieldSchemaByName(String colName) { + if (inputColnameToIndFsMap == null) { + fillColumnIndexByName(); + } + return inputColnameToIndFsMap.get(colName).getRight(); + } + + public List getStorageSchemaCols() { return getColsInternal(false); } + public List getCols() { + if (!isNonNative()) { + return getColsInternal(false); + } else { + List nonPartFields = new ArrayList<>(); + Set partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); + for (FieldSchema field : getColsInternal(false)) { + if (!partFieldsName.contains(field.getName())) { + nonPartFields.add(field); + } + } + return nonPartFields; + } + } + public List getColsForMetastore() { return getColsInternal(true); } @@ -762,10 +833,11 @@ private List getColsInternal(boolean forMs) { * @return List<FieldSchema> */ public List getAllCols() { - ArrayList f_list = new ArrayList(); - f_list.addAll(getCols()); - f_list.addAll(getPartCols()); - return f_list; + List fsList = new ArrayList<>(getColsInternal(false)); + if (!isNonNative()) { + fsList.addAll(getPartitionKeys()); + } + return fsList; } public void setPartCols(List partCols) { @@ -813,7 +885,7 @@ public void setOutputFormatClass(String name) throws HiveException { } public boolean isPartitioned() { - return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : + return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : CollectionUtils.isNotEmpty(getPartCols()); } @@ -916,6 +988,9 @@ public void setDbName(String databaseName) { } public List getPartitionKeys() { + if (tTable.getPartitionKeys() == null) { + tTable.setPartitionKeys(new ArrayList<>()); + } return tTable.getPartitionKeys(); } @@ -1009,12 +1084,12 @@ public boolean isMaterializedView() { * Use the information from this partition. * @return Partition name to value mapping. */ - public LinkedHashMap createSpec( + public Map createSpec( org.apache.hadoop.hive.metastore.api.Partition tp) { List fsl = getPartCols(); List tpl = tp.getValues(); - LinkedHashMap spec = new LinkedHashMap(fsl.size()); + Map spec = LinkedHashMap.newLinkedHashMap(fsl.size()); for (int i = 0; i < fsl.size(); i++) { FieldSchema fs = fsl.get(i); String value = tpl.get(i); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 3d3e4ce7663f..0e914843e2e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -807,8 +807,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, for (FieldNode col : cols) { int index = originalOutputColumnNames.indexOf(col.getFieldName()); Table tab = cppCtx.getParseContext().getViewProjectToTableSchema().get(op); - List fullFieldList = new ArrayList(tab.getCols()); - fullFieldList.addAll(tab.getPartCols()); + List fullFieldList = new ArrayList<>(tab.getAllCols()); cppCtx.getParseContext().getColumnAccessInfo() .add(tab.getCompleteName(), fullFieldList.get(index).getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index dba737e382c0..46b3ff129f49 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -169,7 +170,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje Table table = ts.getConf().getTableMetadata(); boolean nonEquiJoin = isNonEquiJoin(ctx.parent); - if (table != null && table.isPartitionKey(column) && !nonEquiJoin) { + // Non-native tables (e.g. Iceberg) share a single table location for all partitions, so + // path-based DPP cannot work; prefer split-level pruning when the storage handler supports it. + if (table != null && table.isNonNative() && !nonEquiJoin + && table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) { + String columnType = table.getFieldSchemaByName(column).getType(); + generateEventOperatorPlan(ctx, parseContext, ts, column, columnType, ctx.parent); + } else if (table != null && table.isPartitionKey(column) && !nonEquiJoin + && !table.hasNonNativePartitionSupport()) { String columnType = table.getPartColByName(column).getType(); String alias = ts.getConf().getAlias(); PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts); @@ -191,11 +199,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje // all partitions have been statically removed LOG.debug("No partition pruning necessary."); } - } else if (table.isNonNative() && - table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) { - generateEventOperatorPlan(ctx, parseContext, ts, column, - table.getCols().stream().filter(e -> e.getName().equals(column)). - map(e -> e.getType()).findFirst().get(), ctx.parent); } else { // semijoin LOG.debug("Column " + column + " is not a partition column"); if (semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f4b4c2ff3bad..82f81861a4dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1767,7 +1767,7 @@ private ImmutablePair validateInputFormatAndSchemaEvolution(Ma * allColumnNameList and allTypeInfoList variables -- into the data and partition columns. */ - LinkedHashMap partSpec = partDesc.getPartSpec(); + Map partSpec = partDesc.getPartSpec(); if (partSpec != null && partSpec.size() > 0) { partitionColumnCount = partSpec.size(); dataColumnCount = dataAndPartColumnCount - partitionColumnCount; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index 91340b1b76ef..d08fe92208ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer.ppr; import java.util.ArrayList; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -51,7 +50,7 @@ public class PartExprEvalUtils { * @throws HiveException */ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws HiveException { - LinkedHashMap partSpec = p.getSpec(); + Map partSpec = p.getSpec(); Properties partProps = p.getSchema(); String[] partKeyTypes; @@ -59,8 +58,8 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv if (!partSpec.keySet().containsAll(expr.getCols())) { return null; } - partKeyTypes = p.getTable().getStorageHandler().getPartitionKeys(p.getTable()).stream() - .map(FieldSchema::getType).toArray(String[]::new); + partKeyTypes = p.getTable().getPartCols().stream().map(FieldSchema::getType) + .toArray(String[]::new); } else { String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); partKeyTypes = pcolTypes.trim().split(":"); @@ -104,7 +103,7 @@ public static Pair prepareExpr( ExprNodeDesc expr, List partColumnNames, List partColumnTypeInfos) throws HiveException { // Create the row object - List partObjectInspectors = new ArrayList(); + List partObjectInspectors = new ArrayList<>(); for (int i = 0; i < partColumnNames.size(); i++) { partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( partColumnTypeInfos.get(i))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java index 06912a1b3226..05f3b85f271f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AcidExportSemanticAnalyzer.java @@ -175,7 +175,8 @@ private void analyzeAcidExport(ASTNode ast, Table exportTable, ASTNode tokRefOrN //now generate insert statement //insert into newTableName select * from ts StringBuilder rewrittenQueryStr = generateExportQuery( - newTable.getPartCols(), tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); + newTable.getPartCols(), + tokRefOrNameExportTable, (ASTNode) tokRefOrNameExportTable.parent, newTableName); ReparseResult rr = ParseUtils.parseRewrittenQuery(ctx, rewrittenQueryStr); Context rewrittenCtx = rr.rewrittenCtx; rewrittenCtx.setIsUpdateDeleteMerge(false); //it's set in parseRewrittenQuery() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index ffc1673f3670..d759a91372c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -353,6 +353,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; @@ -3013,10 +3014,10 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tabMetaData.getPartCols()) { - colName = part_col.getName(); + for (FieldSchema partCol : tabMetaData.getPartitionKeys()) { + colName = partCol.getName(); colInfo = new ColumnInfo(colName, - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), isNullable(colName, nnc, pkc), tableAlias, true); rr.put(tableAlias, colName, colInfo); cInfoLst.add(colInfo); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java index 9109f9cb6086..ff851577611f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java @@ -262,8 +262,10 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator entry : partSpec.entrySet()) { + if (entry.getValue() == null) { + dynPartsCount++; + } + } + } + boolean inputRRHasStaticParts = (this.columns.size() + dynPartsCount < columns.size()); // if there is any partition column (in static partition or dynamic // partition or mixed case) - int dynamicPartBegin = -1; + int dynamicPartBegin = 0; for (int i = 0; i < partitionColumns.size(); i++) { ExprNodeDesc exprNodeDesc; TypeInfo srcType; String partColName = partitionColumns.get(i).getName(); + int index = tbl.getColumnIndexByName(partColName); + ColumnInfo col; + // 2. deal with static partition columns if (partSpec != null && partSpec.containsKey(partColName) - && partSpec.get(partColName) != null) { + && partSpec.get(partColName) != null ) { if (dynamicPartBegin > 0) { throw new SemanticException( "Dynamic partition columns should not come before static partition columns."); } - exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName)); + if (inputRRHasStaticParts) { + col = columns.get(index + dynamicPartBegin); + exprNodeDesc = new ExprNodeColumnDesc(col); + } else { + exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName)); + } srcType = exprNodeDesc.getTypeInfo(); + if (!inputRRHasStaticParts) { + dynamicPartBegin--; + } } // 3. dynamic partition columns else { - dynamicPartBegin++; - ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin); + col = columns.get(index + dynamicPartBegin); exprNodeDesc = new ExprNodeColumnDesc(col); srcType = col.getType(); - } - TypeInfo destType = selRSSig.get(this.columns.size() + i).getType(); + TypeInfo destType = selRSSig.get(index).getType(); if (!srcType.equals(destType)) { // This may be possible when srcType is string but destType is integer exprNodeDesc = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() .createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType); } colList.add(exprNodeDesc); - String internalName = selRS.getColumnNames().get(this.columns.size() + i); + String internalName = selRS.getColumnNames().get(index); columnNames.add(internalName); columnExprMap.put(internalName, exprNodeDesc); - signature.add(selRSSig.get(this.columns.size() + i)); + signature.add(selRSSig.get(index)); } operator.setConf(new SelectDesc(colList, columnNames)); operator.setColumnExprMap(columnExprMap); @@ -317,7 +337,7 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator columnNameToIndex) { + private Integer getSelRSColumnIndex(String columnName, ColumnInfo col, Map columnNameToIndex) { ObjectInspector objectInspector = col.getObjectInspector(); if (objectInspector == null) { return null; @@ -326,7 +346,7 @@ private Integer getSelRSColumnIndex(int i, ColumnInfo col, Map if (!columnSupported) { return null; } - return columnNameToIndex.get(this.columns.get(i).getName()); + return columnNameToIndex.get(columnName); } public String getCompleteName() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index e23e54aa5230..622e28210f94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -107,7 +107,7 @@ private boolean shouldRewrite(ASTNode tree) { */ private static FieldSchemas getStatsEligibleFieldSchemas(Table tbl) { List result = new ArrayList<>(); - for (FieldSchema col : tbl.getCols()) { + for (FieldSchema col : tbl.getStorageSchemaCols()) { String type = col.getType(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); boolean isSupported = ColumnStatsAutoGatherContext.isColumnSupported(typeInfo.getCategory(), () -> typeInfo); @@ -207,8 +207,7 @@ private static CharSequence genPartitionClause(Table tbl, List pa private static String getColTypeOf(Table tbl, String partKey) { - for (FieldSchema fs : tbl.hasNonNativePartitionSupport() ? - tbl.getStorageHandler().getPartitionKeys(tbl) : tbl.getPartitionKeys()) { + for (FieldSchema fs : tbl.getPartCols()) { if (partKey.equalsIgnoreCase(fs.getName())) { return fs.getType().toLowerCase(); } @@ -231,12 +230,12 @@ protected static List getFieldSchemasByColName(Table tbl, List col); } List result = new ArrayList<>(); - List tableColNames = new FieldSchemas(tbl.getCols()).getColName(); + List tableColNames = new FieldSchemas(tbl.getStorageSchemaCols()).getColName(); for (String colName : colNames) { FieldSchema fs = specifiedColsMap.get(colName.toLowerCase()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 4d4956fbec13..dcf197a2c201 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.QueryState; @@ -89,7 +88,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.TreeMap; /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index eb4a73f1e5e9..9093570706f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -561,7 +561,7 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc } rewrittenQueryStr.append(getFullTableNameForSQL((ASTNode)(tableTree.getChild(0)))); - addPartitionColsToInsert(table.getPartCols(), inpPartSpec, rewrittenQueryStr); + addPartitionColsToInsert(parts, inpPartSpec, rewrittenQueryStr); rewrittenQueryStr.append(" select * from "); rewrittenQueryStr.append(tempTblName); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java index 882840ffef5a..c93b4a35a79d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.rewrite.MergeStatement; import org.apache.hadoop.hive.ql.parse.rewrite.RewriterFactory; -import org.apache.hadoop.hive.ql.plan.HiveOperation; import java.util.ArrayList; import java.util.HashMap; @@ -230,7 +229,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause String deleteExtraPredicate) throws SemanticException { assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED; assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE; - Map newValuesMap = new HashMap<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + Map newValuesMap = HashMap.newHashMap(targetTable.getAllCols().size()); ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0); //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions //before re-parsing, i.e. they are known to SemanticAnalyzer logic @@ -240,7 +239,7 @@ private MergeStatement.UpdateClause handleUpdate(ASTNode whenMatchedUpdateClause //insert into target (p1) select current_date(), 5, c3, p1 where .... //since we take the RHS of set exactly as it was in Input, we don't need to deal with quoting/escaping column/table //names - List nonPartCols = targetTable.getCols(); + List nonPartCols = targetTable.getStorageSchemaCols(); Map colNameToDefaultConstraint = getColNameToDefaultValueMap(targetTable); for (FieldSchema fs : nonPartCols) { String name = fs.getName(); @@ -303,7 +302,7 @@ private List findWhenClauses(ASTNode tree, int start) throws SemanticEx "Unexpected node type found: " + whenClause.getType() + addParseInfo(whenClause); whenClauses.add(whenClause); } - if (whenClauses.size() <= 0) { + if (whenClauses.isEmpty()) { //Futureproofing: the parser will actually not allow this throw new SemanticException("Must have at least 1 WHEN clause in MERGE statement"); } @@ -431,8 +430,7 @@ private static final class OnClauseAnalyzer { OnClauseAnalyzer(ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery, HiveConf conf, String onClauseAsString) { this.onClause = onClause; - allTargetTableColumns.addAll(targetTable.getCols()); - allTargetTableColumns.addAll(targetTable.getPartCols()); + allTargetTableColumns.addAll(targetTable.getAllCols()); this.targetTableNameInSourceQuery = unescapeIdentifier(targetTableNameInSourceQuery); this.conf = conf; this.onClauseAsString = onClauseAsString; @@ -499,11 +497,7 @@ private void handleUnresolvedColumns() { private void addColumn2Table(String tableName, String columnName) { tableName = tableName.toLowerCase(); //normalize name for mapping tableNamesFound.add(tableName); - List cols = table2column.get(tableName); - if (cols == null) { - cols = new ArrayList<>(); - table2column.put(tableName, cols); - } + List cols = table2column.computeIfAbsent(tableName, k -> new ArrayList<>()); //we want to preserve 'columnName' as it was in original input query so that rewrite //looks as much as possible like original query cols.add(columnName); @@ -526,7 +520,7 @@ private String getPredicate() { } StringBuilder sb = new StringBuilder(); for (String col : targetCols) { - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(" AND "); } //but preserve table name in SQL @@ -605,17 +599,15 @@ protected String getMatchedText(ASTNode n) { } protected boolean isAliased(ASTNode n) { - switch (n.getType()) { - case HiveParser.TOK_TABREF: - return findTabRefIdxs(n)[0] != 0; - case HiveParser.TOK_TABNAME: - return false; - case HiveParser.TOK_SUBQUERY: + return switch (n.getType()) { + case HiveParser.TOK_TABREF -> findTabRefIdxs(n)[0] != 0; + case HiveParser.TOK_TABNAME -> false; + case HiveParser.TOK_SUBQUERY -> { assert n.getChildCount() > 1 : "Expected Derived Table to be aliased"; - return true; - default: - throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); - } + yield true; + } + default -> throw raiseWrongType("TOK_TABREF|TOK_TABNAME", n); + }; } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 9964b9369065..9edbe5b05e13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -123,15 +123,11 @@ public static ASTNode parse( * @return boolean */ public static boolean isJoinToken(ASTNode node) { - switch (node.getToken().getType()) { - case HiveParser.TOK_JOIN: - case HiveParser.TOK_LEFTOUTERJOIN: - case HiveParser.TOK_RIGHTOUTERJOIN: - case HiveParser.TOK_FULLOUTERJOIN: - return true; - default: - return false; - } + return switch (node.getToken().getType()) { + case HiveParser.TOK_JOIN, HiveParser.TOK_LEFTOUTERJOIN, HiveParser.TOK_RIGHTOUTERJOIN, + HiveParser.TOK_FULLOUTERJOIN -> true; + default -> false; + }; } /** @@ -163,12 +159,10 @@ public static List validateColumnNameUniqueness( // but it should not be a major bottleneck as the number of columns are // anyway not so big Iterator iterCols = fieldSchemas.iterator(); - List colNames = new ArrayList(); + List colNames = new ArrayList<>(); while (iterCols.hasNext()) { String colName = iterCols.next().getName(); - Iterator iter = colNames.iterator(); - while (iter.hasNext()) { - String oldColName = iter.next(); + for (String oldColName : colNames) { if (colName.equalsIgnoreCase(oldColName)) { throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES .getMsg(oldColName)); @@ -286,7 +280,7 @@ public static Pair containsTokenOfType(ASTNode root, Integer .. final Set tokensToMatch = new HashSet<>(Arrays.asList(tokens)); final String[] matched = {null}; - boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate() { + boolean check = ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate<>() { @Override public boolean apply(ASTNode node) { if (tokensToMatch.contains(node.getType())) { @@ -302,7 +296,7 @@ public boolean apply(ASTNode node) { } public static boolean containsTokenOfType(ASTNode root, PTFUtils.Predicate predicate) { - Queue queue = new ArrayDeque(); + Queue queue = new ArrayDeque<>(); // BFS queue.add(root); @@ -535,7 +529,7 @@ public static String getKeywords(Set excludes) { if (excludes != null && excludes.contains(name)) { continue; } - if (sb.length() > 0) { + if (!sb.isEmpty()) { sb.append(","); } sb.append(name); @@ -581,8 +575,7 @@ public static Map> getFullPartitionSpecs( CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); Map colTypes = new HashMap<>(); - List partitionKeys = table.hasNonNativePartitionSupport() ? - table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); + List partitionKeys = table.getPartCols(); for (FieldSchema fs : partitionKeys) { colTypes.put(fs.getName().toLowerCase(), fs.getType()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java index 101f6b1fc3d8..a96f32244f91 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RewriteSemanticAnalyzer.java @@ -43,6 +43,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; /** @@ -117,9 +118,8 @@ protected void analyzeRewrittenTree(ASTNode rewrittenTree, Context rewrittenCtx) */ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) throws SemanticException { String columnName = normalizeColName(colName.getText()); - // Make sure this isn't one of the partitioning columns, that's not supported. - for (FieldSchema fschema : targetTable.getPartCols()) { + for (FieldSchema fschema : targetTable.getPartitionKeys()) { if (fschema.getName().equalsIgnoreCase(columnName)) { throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg()); } @@ -129,7 +129,7 @@ protected void checkValidSetClauseTarget(ASTNode colName, Table targetTable) thr throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_BUCKET_VALUE, columnName); } boolean foundColumnInTargetTable = false; - for (FieldSchema col : targetTable.getCols()) { + for (FieldSchema col : targetTable.getStorageSchemaCols()) { if (columnName.equalsIgnoreCase(col.getName())) { foundColumnInTargetTable = true; break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f8ec90287202..47072ee7770a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -5166,7 +5166,7 @@ RowResolver handleInsertStatementSpec(List col_list, String dest, } List newColList = new ArrayList(); colListPos = 0; - List targetTableCols = target != null ? target.getCols() : partition.getCols(); + List targetTableCols = target != null ? target.getStorageSchemaCols() : partition.getCols(); List targetTableColNames = new ArrayList(); List targetTableColTypes = new ArrayList(); for(FieldSchema fs : targetTableCols) { @@ -12040,10 +12040,10 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException { } // Hack!! - refactor once the metadata APIs with types are ready // Finally add the partitioning columns - for (FieldSchema part_col : tab.getPartCols()) { - LOG.trace("Adding partition col: " + part_col); - rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), - TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); + for (FieldSchema partCol : tab.getPartitionKeys()) { + LOG.trace("Adding partition col: " + partCol); + rwsch.put(alias, partCol.getName(), new ColumnInfo(partCol.getName(), + TypeInfoFactory.getPrimitiveTypeInfo(partCol.getType()), alias, true)); } // put virtual columns into RowResolver. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java index b72f2496d938..b7335473da85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteMergeRewriter.java @@ -202,7 +202,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(hintStr); hintStr = null; } - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); values.addAll(sqlGenerator.getDeleteValues(Context.Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addValuesForRowLineageForCopyOnMerge(isRowLineageSupported, values, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java index ae57198860c3..187547430e5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/CopyOnWriteUpdateRewriter.java @@ -21,7 +21,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.RowLineageUtils; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -90,7 +92,7 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB } Map setColExprs = new HashMap<>(updateBlock.getSetCols().size()); - List nonPartCols = updateBlock.getTargetTable().getCols(); + List nonPartCols = updateBlock.getTargetTable().getStorageSchemaCols(); for (int i = 0; i < nonPartCols.size(); i++) { if (columnOffset > 0 || i > 0) { sqlGenerator.append(','); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java index 3ec2e580f046..ba6640c6868b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/MergeRewriter.java @@ -224,7 +224,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau sqlGenerator.append(" -- update clause").append("\n"); List valuesAndAcidSortKeys = new ArrayList<>( - targetTable.getCols().size() + targetTable.getPartCols().size() + 1); + targetTable.getAllCols().size() + 1); valuesAndAcidSortKeys.addAll(sqlGenerator.getSortKeys(Operation.MERGE)); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), valuesAndAcidSortKeys); sqlGenerator.appendInsertBranch(hintStr, valuesAndAcidSortKeys); @@ -238,10 +238,9 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau protected void addValues(Table targetTable, String targetAlias, Map newValues, List values) { - UnaryOperator formatter = name -> String.format("%s.%s", targetAlias, + UnaryOperator formatter = name -> String.format("%s.%s", targetAlias, HiveUtils.unparseIdentifier(name, conf)); - - for (FieldSchema fieldSchema : targetTable.getCols()) { + for (FieldSchema fieldSchema : targetTable.getStorageSchemaCols()) { if (newValues.containsKey(fieldSchema.getName())) { String rhsExp = newValues.get(fieldSchema.getName()); values.add(getRhsExpValue(rhsExp, formatter.apply(fieldSchema.getName()))); @@ -249,8 +248,8 @@ protected void addValues(Table targetTable, String targetAlias, Map values.add( + + targetTable.getPartitionKeys().forEach(fieldSchema -> values.add( formatter.apply(fieldSchema.getName()))); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java index 84fcf186f6b7..06edaca90f0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitMergeRewriter.java @@ -58,7 +58,7 @@ public void appendWhenMatchedUpdateClause(MergeStatement.UpdateClause updateClau String onClauseAsString = mergeStatement.getOnClauseAsText(); sqlGenerator.append(" -- update clause (insert part)\n"); - List values = new ArrayList<>(targetTable.getCols().size() + targetTable.getPartCols().size()); + List values = new ArrayList<>(targetTable.getAllCols().size()); addValues(targetTable, targetAlias, updateClause.getNewValuesMap(), values); addRowLineageColumnsForWhenMatchedUpdateClause(isRowLineageSupported, values, targetAlias, conf); sqlGenerator.appendInsertBranch(hintStr, values); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java index d14ddc7eb485..d5517b8fcfff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/SplitUpdateRewriter.java @@ -20,7 +20,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -65,10 +67,10 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB List deleteValues = sqlGenerator.getDeleteValues(OPERATION); int columnOffset = deleteValues.size(); - List insertValues = new ArrayList<>(updateBlock.getTargetTable().getCols().size()); + List insertValues = new ArrayList<>(updateBlock.getTargetTable().getStorageSchemaCols().size()); boolean first = true; - List nonPartCols = updateBlock.getTargetTable().getCols(); + List nonPartCols = updateBlock.getTargetTable().getStorageSchemaCols(); for (int i = 0; i < nonPartCols.size(); i++) { if (first) { first = false; @@ -98,10 +100,8 @@ public ParseUtils.ReparseResult rewrite(Context context, UpdateStatement updateB insertValues.add(sqlGenerator.qualify(identifier)); } - if (updateBlock.getTargetTable().getPartCols() != null) { - updateBlock.getTargetTable().getPartCols().forEach( - fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); - } + updateBlock.getTargetTable().getPartitionKeys().forEach( + fieldSchema -> insertValues.add(sqlGenerator.qualify(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf)))); addRowLineageColumnsForUpdate(updateBlock.getTargetTable(), sqlGenerator, insertValues, conf); sqlGenerator.append(" FROM ").append(sqlGenerator.getTargetTableFullName()).append(") "); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java index 7587daf13055..c7b187c76dd3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/rewrite/sql/MultiInsertSqlGenerator.java @@ -148,7 +148,7 @@ public void removeLastChar() { } public void appendPartColsOfTargetTableWithComma(String alias) { - if (targetTable.getPartCols() == null || targetTable.getPartCols().isEmpty()) { + if (targetTable.getPartitionKeys() == null || targetTable.getPartitionKeys().isEmpty()) { return; } queryStr.append(','); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 0dcfe72d7f5b..b5b4662a1491 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -57,7 +57,7 @@ public class PartitionDesc implements Serializable, Cloneable { private static final Interner> CLASS_INTERNER = Interners.newWeakInterner(); private TableDesc tableDesc; - private LinkedHashMap partSpec; + private Map partSpec; private Class inputFileFormatClass; private Class outputFileFormatClass; private Properties properties; @@ -73,7 +73,7 @@ public void setBaseFileName(String baseFileName) { public PartitionDesc() { } - public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { + public PartitionDesc(final TableDesc table, final Map partSpec) { this.tableDesc = table; setPartSpec(partSpec); } @@ -138,11 +138,11 @@ public void setTableDesc(TableDesc tableDesc) { } @Explain(displayName = "partition values", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) - public LinkedHashMap getPartSpec() { + public Map getPartSpec() { return partSpec; } - public void setPartSpec(final LinkedHashMap partSpec) { + public void setPartSpec(final Map partSpec) { StringInternUtils.internValuesInMap(partSpec); this.partSpec = partSpec; } diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java index 0f5d7b915168..f81f8e9ec816 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java @@ -82,7 +82,7 @@ public static final CacheTag build(String tableName) { return new TableCacheTag(tableName); } - public static final CacheTag build(String tableName, LinkedHashMap partDescMap) { + public static final CacheTag build(String tableName, Map partDescMap) { if (StringUtils.isEmpty(tableName) || partDescMap == null || partDescMap.isEmpty()) { throw new IllegalArgumentException(); }