This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 94ed30d9fae51ffb6c448e70b3076724c0eb0a8e
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Wed Apr 10 18:12:16 2024 +0200

    IMPALA-12991: Eliminate unnecessary SORT for Iceberg DELETEs
    
    Since we are using IcebergBufferedDeleteSink, which sorts the data
    before flushing, there is no need to add a SORT node before the sink.
    
    Testing:
     * updated planner tests
    
    Change-Id: I94a691e7990228a1ec2de03e6ad90ebb97931581
    Reviewed-on: http://gerrit.cloudera.org:8080/21285
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/planner/Planner.java    |  8 ++++-
 .../queries/PlannerTest/iceberg-v2-delete.test     | 40 ----------------------
 2 files changed, 7 insertions(+), 41 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java 
b/fe/src/main/java/org/apache/impala/planner/Planner.java
index f3c0b5bfc..0bd71a40a 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -29,6 +29,7 @@ import 
org.apache.impala.analysis.AnalysisContext.AnalysisResult;
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.ColumnLineageGraph;
 import org.apache.impala.analysis.ColumnLineageGraph.ColumnLabel;
+import org.apache.impala.analysis.DeleteStmt;
 import org.apache.impala.analysis.DmlStatementBase;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.ExprSubstitutionMap;
@@ -279,7 +280,12 @@ public class Planner {
       rootFragment = distributedPlanner.createDmlFragment(
           rootFragment, stmt, ctx_.getRootAnalyzer(), fragments);
     }
-    createPreDmlSort(stmt, rootFragment, ctx_.getRootAnalyzer());
+    // We don't need to add a SORT node for DELETE operations as we are using 
the
+    // IcebergBufferedDeleteSink. UPDATE/MERGE statements will still require to
+    // sort their data records.
+    if (!(stmt instanceof DeleteStmt)) {
+      createPreDmlSort(stmt, rootFragment, ctx_.getRootAnalyzer());
+    }
     return rootFragment;
   }
 
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
index a0b8aceed..8e42147da 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-delete.test
@@ -183,10 +183,6 @@ delete from iceberg_v2_partitioned_position_deletes where 
id = 20;
 ---- PLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-03:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=1
-|
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
 |  row-size=40B cardinality=1
 |
@@ -203,10 +199,6 @@ BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position
 ---- DISTRIBUTEDPLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-05:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=1
-|
 04:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED]
@@ -229,10 +221,6 @@ delete from iceberg_v2_partitioned_position_deletes where 
action = 'click';
 ---- PLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-03:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=3
-|
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
 |  row-size=36B cardinality=3
 |
@@ -249,10 +237,6 @@ BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position
 ---- DISTRIBUTEDPLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-04:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=3
-|
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED]
 |  row-size=36B cardinality=3
 |
@@ -273,10 +257,6 @@ delete from iceberg_v2_partitioned_position_deletes where 
user like 'A%';
 ---- PLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-03:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=1
-|
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
 |  row-size=48B cardinality=1
 |
@@ -293,10 +273,6 @@ BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position
 ---- DISTRIBUTEDPLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-05:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=1
-|
 04:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED]
@@ -320,10 +296,6 @@ where id = (select max(id) from 
iceberg_v2_delete_positional);
 ---- PLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-08:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=10
-|
 07:HASH JOIN [LEFT SEMI JOIN]
 |  hash predicates: id = max(id)
 |  runtime filters: RF000 <- max(id)
@@ -362,10 +334,6 @@ BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position
 ---- DISTRIBUTEDPLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-14:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=10
-|
 13:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 07:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
@@ -420,10 +388,6 @@ DELETE FROM iceberg_v2_partitioned_position_deletes WHERE 
FILE__POSITION = id
 ---- PLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-03:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=1
-|
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN]
 |  row-size=40B cardinality=1
 |
@@ -440,10 +404,6 @@ BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position
 ---- DISTRIBUTEDPLAN
 BUFFERED DELETE FROM ICEBERG 
[functional_parquet.iceberg_v2_partitioned_position_deletes-POSITION-DELETE]
 |
-05:SORT
-|  order by: partition__spec__id ASC NULLS LAST, 
iceberg__partition__serialized ASC NULLS LAST
-|  row-size=36B cardinality=1
-|
 04:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_partitioned_position_deletes.PARTITION__SPEC__ID,functional_parquet.iceberg_v2_partitioned_position_deletes.ICEBERG__PARTITION__SERIALIZED)]
 |
 02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED]

Reply via email to