This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8b6f9273ce012877747c8e06aada289686be5de4 Author: LPL <[email protected]> AuthorDate: Thu Apr 27 23:17:58 2023 +0800 IMPALA-12097: WITH CLAUSE should be skipped when optimizing COUNT(*) query on Iceberg table When optimizing the simple count star query for the Iceberg table, the WITH CLAUSE should be skipped, but that doesn't mean the SQL can't be optimized, because when the WITH CLAUSE is inlined, the final statement is optimized by the CountStarToConstRule. Testing: * Add e2e tests Change-Id: I7b21cbea79be77f2ea8490bd7f7b2f62063eb0e4 Reviewed-on: http://gerrit.cloudera.org:8080/19811 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../org/apache/impala/analysis/SelectStmt.java | 10 +- .../queries/PlannerTest/iceberg-v2-tables.test | 112 +++++++++++++++++++++ .../iceberg-plain-count-star-optimization.test | 24 ++++- .../iceberg-v2-plain-count-star-optimization.test | 30 ++++++ 4 files changed, 173 insertions(+), 3 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index 1b530473b..25a79c189 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -272,7 +272,7 @@ public class SelectStmt extends QueryStmt { if (isAnalyzed()) return; super.analyze(analyzer); new SelectAnalyzer(analyzer).analyze(); - this.optimizePlainCountStarQuery(); + this.optimizePlainCountStarQueryForIcebergTable(); } /** @@ -1430,10 +1430,16 @@ public class SelectStmt extends QueryStmt { * - table is the Iceberg table * - SelectList must contains 'count(*)' or 'count(constant)' * - SelectList can contain constant + * - stmt does not have WITH clause * - only for V1: SelectList can contain other agg functions, e.g. min, sum, etc * e.g. 'SELECT count(*) FROM iceberg_tbl' would be rewritten as 'SELECT constant'. */ - public void optimizePlainCountStarQuery() throws AnalysisException { + public void optimizePlainCountStarQueryForIcebergTable() throws AnalysisException { + // When optimizing the simple count star query for the Iceberg table, the WITH CLAUSE + // should be skipped, but that doesn't mean the SQL can't be optimized, because when + // the WITH CLAUSE is inlined, the final Stmt is optimized by CountStarToConstRule. + if (this.analyzer_.hasWithClause()) return; + if (this.hasWhereClause()) return; if (this.hasGroupByClause()) return; if (this.hasHavingClause()) return; diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test index 30c45b779..66a4f8954 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test @@ -1040,3 +1040,115 @@ PLAN-ROOT SINK skipped Iceberg predicates: action = 'download' row-size=64B cardinality=6 ==== +with u1 as (select count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files), +u2 as (select -1 as c), +u3 as (select count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files) select * from u1, u2, u3; +---- PLAN +PLAN-ROOT SINK +| +10:NESTED LOOP JOIN [CROSS JOIN] +| row-size=17B cardinality=1 +| +|--08:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| | row-size=20B cardinality=6 +| | +| |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | row-size=267B cardinality=4 +| | +| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB +| row-size=20B cardinality=6 +| +09:NESTED LOOP JOIN [CROSS JOIN] +| row-size=9B cardinality=1 +| +|--04:UNION +| constant-operands=1 +| row-size=1B cardinality=1 +| +03:AGGREGATE [FINALIZE] +| output: count(*) +| row-size=8B cardinality=1 +| +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| row-size=20B cardinality=6 +| +|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB +| row-size=267B cardinality=4 +| +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=2 size=1.22KB + row-size=20B cardinality=6 +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +10:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=17B cardinality=1 +| +|--20:EXCHANGE [UNPARTITIONED] +| | +| 19:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 18:EXCHANGE [UNPARTITIONED] +| | +| 08:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=20B cardinality=6 +| | +| |--17:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +| | | +| | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | row-size=267B cardinality=4 +| | +| 16:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | +| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB +| row-size=20B cardinality=6 +| +09:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=9B cardinality=1 +| +|--15:EXCHANGE [UNPARTITIONED] +| | +| 04:UNION +| constant-operands=1 +| row-size=1B cardinality=1 +| +14:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +13:EXCHANGE [UNPARTITIONED] +| +03:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| row-size=20B cardinality=6 +| +|--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +| | +| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| HDFS partitions=1/1 files=2 size=5.33KB +| row-size=267B cardinality=4 +| +11:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| +00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=2 size=1.22KB + row-size=20B cardinality=6 +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test index daeb1e440..da56563eb 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test @@ -224,7 +224,7 @@ aggregation(SUM, NumRowGroups): 0 aggregation(SUM, NumFileMetadataRead): 3 ==== ---- QUERY -select count(*) as c from ice_tbl_u1 union all (select count(*) c from ice_tbl_u2) order by c; +select count(*) as c from ice_tbl_u1 union all (select count(*) c from ice_tbl_u2) order by c; ---- RESULTS 3 6 @@ -234,3 +234,25 @@ BIGINT aggregation(SUM, NumRowGroups): 0 aggregation(SUM, NumFileMetadataRead): 0 ==== +---- QUERY +with u1 as (select count(*) from ice_tbl_u1), u2 as (select count(*) from ice_tbl_u2) select * from u1, u2; +---- RESULTS +3,6 +---- TYPES +BIGINT,BIGINT +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 0 +aggregation(SUM, NumFileMetadataRead): 0 +==== +---- QUERY +with u1 as (select count(*) from ice_tbl_u1), +u2 as (select count(*) from ice_tbl_u1 union all (select count(*) from ice_tbl_u2)) select * from u1, u2 order by 1,2; +---- RESULTS +3,3 +3,6 +---- TYPES +BIGINT,BIGINT +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 0 +aggregation(SUM, NumFileMetadataRead): 0 +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test index c84ee868e..21ac4a1fe 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test @@ -23,6 +23,21 @@ aggregation(SUM, NumOrcStripes): 4 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY +with u1 as (select count(*) as c from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files), +u2 as (select count(*) c from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files_orc), +u3 as (select -1 as c), +u4 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes), +u5 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes_orc) select * from u1, u2, u3, u4, u5; +---- RESULTS +6,6,-1,3,3 +---- TYPES +BIGINT,BIGINT,TINYINT,BIGINT,BIGINT +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 4 +aggregation(SUM, NumOrcStripes): 4 +aggregation(SUM, NumFileMetadataRead): 0 +==== +---- QUERY select count(*) as c from iceberg_v2_positional_not_all_data_files_have_delete_files for system_version as of 752781918366351945 union all (select count(*) as c from iceberg_v2_positional_not_all_data_files_have_delete_files_orc for system_version as of 5003445199566617082) @@ -45,3 +60,18 @@ aggregation(SUM, NumRowGroups): 2 aggregation(SUM, NumOrcStripes): 2 aggregation(SUM, NumFileMetadataRead): 0 ==== +---- QUERY +with u1 as (select count(*) as c from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files for system_version as of 752781918366351945), +u2 as (select count(*) c from functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files_orc for system_version as of 5003445199566617082), +u3 as (select -1 as c), +u4 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes), +u5 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes_orc) select * from u1, u2, u3, u4, u5; +---- RESULTS +9,9,-1,3,3 +---- TYPES +BIGINT,BIGINT,TINYINT,BIGINT,BIGINT +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 2 +aggregation(SUM, NumOrcStripes): 2 +aggregation(SUM, NumFileMetadataRead): 0 +==== \ No newline at end of file
