This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 94ed30d9fae51ffb6c448e70b3076724c0eb0a8e Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Wed Apr 10 18:12:16 2024 +0200 IMPALA-12991: Eliminate unnecessary SORT for Iceberg DELETEs Since we are using IcebergBufferedDeleteSink, which sorts the data before flushing, there is no need to add a SORT node before the sink. Testing: * updated planner tests Change-Id: I94a691e7990228a1ec2de03e6ad90ebb97931581 Reviewed-on: http://gerrit.cloudera.org:8080/21285 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/planner/Planner.java | 8 ++++- .../queries/PlannerTest/iceberg-v2-delete.test | 40 ---------------------- 2 files changed, 7 insertions(+), 41 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java index f3c0b5bfc..0bd71a40a 100644 --- a/fe/src/main/java/org/apache/impala/planner/Planner.java +++ b/fe/src/main/java/org/apache/impala/planner/Planner.java @@ -29,6 +29,7 @@ import org.apache.impala.analysis.AnalysisContext.AnalysisResult; import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.ColumnLineageGraph; import org.apache.impala.analysis.ColumnLineageGraph.ColumnLabel; +import org.apache.impala.analysis.DeleteStmt; import org.apache.impala.analysis.DmlStatementBase; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.ExprSubstitutionMap; @@ -279,7 +280,12 @@ public class Planner { rootFragment = distributedPlanner.createDmlFragment( rootFragment, stmt, ctx_.getRootAnalyzer(), fragments); } - createPreDmlSort(stmt, rootFragment, ctx_.getRootAnalyzer()); + // We don't need to add a SORT node for DELETE operations as we are using the + // IcebergBufferedDeleteSink. UPDATE/MERGE statements will still require to + // sort their data records. + if (!(stmt instanceof DeleteStmt)) { + createPreDmlSort(stmt, rootFragment, ctx_.getRootAnalyzer()); + } return rootFragment; } diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test index a0b8aceed..8e42147da 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test @@ -183,10 +183,6 @@ delete from iceberg_v2_partitioned_position_deletes where id = 20; ---- PLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -03:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=1 -| 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] | row-size=40B cardinality=1 | @@ -203,10 +199,6 @@ BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position ---- DISTRIBUTEDPLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -05:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=1 -| 04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)] | 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] @@ -229,10 +221,6 @@ delete from iceberg_v2_partitioned_position_deletes where action = 'click'; ---- PLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -03:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=3 -| 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] | row-size=36B cardinality=3 | @@ -249,10 +237,6 @@ BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position ---- DISTRIBUTEDPLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -04:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=3 -| 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] | row-size=36B cardinality=3 | @@ -273,10 +257,6 @@ delete from iceberg_v2_partitioned_position_deletes where user like 'A%'; ---- PLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -03:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=1 -| 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] | row-size=48B cardinality=1 | @@ -293,10 +273,6 @@ BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position ---- DISTRIBUTEDPLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -05:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=1 -| 04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)] | 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] @@ -320,10 +296,6 @@ where id = (select max(id) from iceberg_v2_delete_positional); ---- PLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -08:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=10 -| 07:HASH JOIN [LEFT SEMI JOIN] | hash predicates: id = max(id) | runtime filters: RF000 <- max(id) @@ -362,10 +334,6 @@ BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position ---- DISTRIBUTEDPLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -14:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=10 -| 13:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)] | 07:HASH JOIN [LEFT SEMI JOIN, BROADCAST] @@ -420,10 +388,6 @@ DELETE FROM iceberg_v2_partitioned_position_deletes WHERE FILE__POSITION = id ---- PLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -03:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=1 -| 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] | row-size=40B cardinality=1 | @@ -440,10 +404,6 @@ BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position ---- DISTRIBUTEDPLAN BUFFERED DELETE FROM ICEBERG [functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE] | -05:SORT -| order by: partition__spec__id ASC NULLS LAST, iceberg__partition__serialized ASC NULLS LAST -| row-size=36B cardinality=1 -| 04:EXCHANGE [HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)] | 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED]
