This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5b2360e836 [opt](planner) speed up computeColumnsFilter on ScanNode 
(#23742)
5b2360e836 is described below

commit 5b2360e836fb3c6b953801f3296a0998b079864e
Author: morrySnow <101034200+morrys...@users.noreply.github.com>
AuthorDate: Fri Sep 1 14:22:17 2023 +0800

    [opt](planner) speed up computeColumnsFilter on ScanNode (#23742)
    
    computeColumnsFilter compute filter on all table base schema's column.
    However, it table is very wide, such as 5000 columns. It will take a
    long time. This PR compare conjuncts size and columns size. If conjuncts
    size is small than columns size, then collect slots from conjuncts to
    avoid traverse all columns.
---
 .../planner/BackendPartitionedSchemaScanNode.java  |  2 +-
 .../org/apache/doris/planner/OlapScanNode.java     |  4 +-
 .../java/org/apache/doris/planner/ScanNode.java    | 56 +++++++++++++++-------
 .../doris/planner/external/ExternalScanNode.java   |  4 +-
 .../doris/planner/external/FileQueryScanNode.java  |  2 +-
 .../doris/planner/external/hudi/HudiScanNode.java  |  2 +-
 .../planner/external/paimon/PaimonScanNode.java    |  2 +-
 7 files changed, 47 insertions(+), 25 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java
index 6b2e278a01..53f48c60e8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java
@@ -78,7 +78,7 @@ public class BackendPartitionedSchemaScanNode extends 
SchemaScanNode {
     @Override
     public void init(Analyzer analyzer) throws UserException {
         super.init(analyzer);
-        computeColumnFilter();
+        computeColumnsFilter();
         computePartitionInfo();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index 7fdf7eda2d..806000c637 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -536,7 +536,7 @@ public class OlapScanNode extends ScanNode {
         // lazy evaluation, since stmt is a prepared statment
         isFromPrepareStmt = analyzer.getPrepareStmt() != null;
         if (!isFromPrepareStmt) {
-            computeColumnFilter();
+            computeColumnsFilter();
             computePartitionInfo();
         }
         computeTupleState(analyzer);
@@ -1116,7 +1116,7 @@ public class OlapScanNode extends ScanNode {
         // Lazy evaluation
         selectedIndexId = olapTable.getBaseIndexId();
         // Only key columns
-        computeColumnFilter(olapTable.getBaseSchemaKeyColumns());
+        computeColumnsFilter(olapTable.getBaseSchemaKeyColumns());
         computePartitionInfo();
         scanBackendIds.clear();
         scanTabletIds.clear();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
index 84b95c5a06..ef42ae1e4e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
@@ -154,32 +154,54 @@ public abstract class ScanNode extends PlanNode {
             Set<SlotId> requiredByProjectSlotIdSet) throws UserException {
     }
 
+    private void computeColumnFilter(Column column, SlotDescriptor slotDesc) {
+        // Set `columnFilters` all the time because `DistributionPruner` also 
use this.
+        // Maybe we could use `columnNameToRange` for `DistributionPruner` and
+        // only create `columnFilters` when 
`partition_prune_algorithm_version` is 1.
+        PartitionColumnFilter keyFilter = createPartitionFilter(slotDesc, 
conjuncts);
+        if (null != keyFilter) {
+            columnFilters.put(column.getName(), keyFilter);
+        }
+
+        ColumnRange columnRange = createColumnRange(slotDesc, conjuncts);
+        if (columnRange != null) {
+            columnNameToRange.put(column.getName(), columnRange);
+        }
+    }
+
     // TODO(ML): move it into PrunerOptimizer
-    public void computeColumnFilter(List<Column> columns) {
-        for (Column column : columns) {
-            SlotDescriptor slotDesc = desc.getColumnSlot(column.getName());
-            if (null == slotDesc) {
-                continue;
+    public void computeColumnsFilter(List<Column> columns) {
+        if (columns.size() > conjuncts.size()) {
+            Set<SlotRef> slotRefs = Sets.newHashSet();
+            for (Expr conjunct : conjuncts) {
+                conjunct.collect(SlotRef.class, slotRefs);
             }
-            // Set `columnFilters` all the time because `DistributionPruner` 
also use this.
-            // Maybe we could use `columnNameToRange` for `DistributionPruner` 
and
-            // only create `columnFilters` when 
`partition_prune_algorithm_version` is 1.
-            PartitionColumnFilter keyFilter = createPartitionFilter(slotDesc, 
conjuncts);
-            if (null != keyFilter) {
-                columnFilters.put(column.getName(), keyFilter);
+            for (SlotRef slotRef : slotRefs) {
+                SlotDescriptor slotDesc = slotRef.getDesc();
+                if (null == slotDesc) {
+                    continue;
+                }
+                Column column = slotDesc.getColumn();
+                if (column == null) {
+                    continue;
+                }
+                computeColumnFilter(column, slotDesc);
             }
-
-            ColumnRange columnRange = createColumnRange(slotDesc, conjuncts);
-            if (columnRange != null) {
-                columnNameToRange.put(column.getName(), columnRange);
+        } else {
+            for (Column column : columns) {
+                SlotDescriptor slotDesc = desc.getColumnSlot(column.getName());
+                if (null == slotDesc) {
+                    continue;
+                }
+                computeColumnFilter(column, slotDesc);
             }
         }
     }
 
-    public void computeColumnFilter() {
+    public void computeColumnsFilter() {
         // for load scan node, table is null
         if (desc.getTable() != null) {
-            computeColumnFilter(desc.getTable().getBaseSchema());
+            computeColumnsFilter(desc.getTable().getBaseSchema());
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java
index 2fda6fbd28..a4751b5205 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java
@@ -55,14 +55,14 @@ public abstract class ExternalScanNode extends ScanNode {
     public void init(Analyzer analyzer) throws UserException {
         super.init(analyzer);
         computeStats(analyzer);
-        computeColumnFilter();
+        computeColumnsFilter();
         initBackendPolicy();
     }
 
     // For Nereids
     @Override
     public void init() throws UserException {
-        computeColumnFilter();
+        computeColumnsFilter();
         initBackendPolicy();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
index e654a64c70..312a09d88e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java
@@ -139,7 +139,7 @@ public abstract class FileQueryScanNode extends 
FileScanNode {
                                 table.getName()));
             }
         }
-        computeColumnFilter();
+        computeColumnsFilter();
         initBackendPolicy();
         initSchemaParams();
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java
index 20a3a1cf6d..c92c46659e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java
@@ -117,7 +117,7 @@ public class HudiScanNode extends HiveScanNode {
                     String.format("Querying external view '%s.%s' is not 
supported", table.getDbName(),
                             table.getName()));
         }
-        computeColumnFilter();
+        computeColumnsFilter();
         initBackendPolicy();
         initSchemaParams();
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
index a68d5edcb2..aedc62869c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java
@@ -69,7 +69,7 @@ public class PaimonScanNode extends FileQueryScanNode {
                     String.format("Querying external view '%s.%s' is not 
supported", table.getDbName(),
                             table.getName()));
         }
-        computeColumnFilter();
+        computeColumnsFilter();
         initBackendPolicy();
         source = new PaimonSource((PaimonExternalTable) table, desc, 
columnNameToRange);
         Preconditions.checkNotNull(source);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to