This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5b2360e836 [opt](planner) speed up computeColumnsFilter on ScanNode (#23742) 5b2360e836 is described below commit 5b2360e836fb3c6b953801f3296a0998b079864e Author: morrySnow <101034200+morrys...@users.noreply.github.com> AuthorDate: Fri Sep 1 14:22:17 2023 +0800 [opt](planner) speed up computeColumnsFilter on ScanNode (#23742) computeColumnsFilter compute filter on all table base schema's column. However, it table is very wide, such as 5000 columns. It will take a long time. This PR compare conjuncts size and columns size. If conjuncts size is small than columns size, then collect slots from conjuncts to avoid traverse all columns. --- .../planner/BackendPartitionedSchemaScanNode.java | 2 +- .../org/apache/doris/planner/OlapScanNode.java | 4 +- .../java/org/apache/doris/planner/ScanNode.java | 56 +++++++++++++++------- .../doris/planner/external/ExternalScanNode.java | 4 +- .../doris/planner/external/FileQueryScanNode.java | 2 +- .../doris/planner/external/hudi/HudiScanNode.java | 2 +- .../planner/external/paimon/PaimonScanNode.java | 2 +- 7 files changed, 47 insertions(+), 25 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java index 6b2e278a01..53f48c60e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java @@ -78,7 +78,7 @@ public class BackendPartitionedSchemaScanNode extends SchemaScanNode { @Override public void init(Analyzer analyzer) throws UserException { super.init(analyzer); - computeColumnFilter(); + computeColumnsFilter(); computePartitionInfo(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 7fdf7eda2d..806000c637 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -536,7 +536,7 @@ public class OlapScanNode extends ScanNode { // lazy evaluation, since stmt is a prepared statment isFromPrepareStmt = analyzer.getPrepareStmt() != null; if (!isFromPrepareStmt) { - computeColumnFilter(); + computeColumnsFilter(); computePartitionInfo(); } computeTupleState(analyzer); @@ -1116,7 +1116,7 @@ public class OlapScanNode extends ScanNode { // Lazy evaluation selectedIndexId = olapTable.getBaseIndexId(); // Only key columns - computeColumnFilter(olapTable.getBaseSchemaKeyColumns()); + computeColumnsFilter(olapTable.getBaseSchemaKeyColumns()); computePartitionInfo(); scanBackendIds.clear(); scanTabletIds.clear(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index 84b95c5a06..ef42ae1e4e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -154,32 +154,54 @@ public abstract class ScanNode extends PlanNode { Set<SlotId> requiredByProjectSlotIdSet) throws UserException { } + private void computeColumnFilter(Column column, SlotDescriptor slotDesc) { + // Set `columnFilters` all the time because `DistributionPruner` also use this. + // Maybe we could use `columnNameToRange` for `DistributionPruner` and + // only create `columnFilters` when `partition_prune_algorithm_version` is 1. + PartitionColumnFilter keyFilter = createPartitionFilter(slotDesc, conjuncts); + if (null != keyFilter) { + columnFilters.put(column.getName(), keyFilter); + } + + ColumnRange columnRange = createColumnRange(slotDesc, conjuncts); + if (columnRange != null) { + columnNameToRange.put(column.getName(), columnRange); + } + } + // TODO(ML): move it into PrunerOptimizer - public void computeColumnFilter(List<Column> columns) { - for (Column column : columns) { - SlotDescriptor slotDesc = desc.getColumnSlot(column.getName()); - if (null == slotDesc) { - continue; + public void computeColumnsFilter(List<Column> columns) { + if (columns.size() > conjuncts.size()) { + Set<SlotRef> slotRefs = Sets.newHashSet(); + for (Expr conjunct : conjuncts) { + conjunct.collect(SlotRef.class, slotRefs); } - // Set `columnFilters` all the time because `DistributionPruner` also use this. - // Maybe we could use `columnNameToRange` for `DistributionPruner` and - // only create `columnFilters` when `partition_prune_algorithm_version` is 1. - PartitionColumnFilter keyFilter = createPartitionFilter(slotDesc, conjuncts); - if (null != keyFilter) { - columnFilters.put(column.getName(), keyFilter); + for (SlotRef slotRef : slotRefs) { + SlotDescriptor slotDesc = slotRef.getDesc(); + if (null == slotDesc) { + continue; + } + Column column = slotDesc.getColumn(); + if (column == null) { + continue; + } + computeColumnFilter(column, slotDesc); } - - ColumnRange columnRange = createColumnRange(slotDesc, conjuncts); - if (columnRange != null) { - columnNameToRange.put(column.getName(), columnRange); + } else { + for (Column column : columns) { + SlotDescriptor slotDesc = desc.getColumnSlot(column.getName()); + if (null == slotDesc) { + continue; + } + computeColumnFilter(column, slotDesc); } } } - public void computeColumnFilter() { + public void computeColumnsFilter() { // for load scan node, table is null if (desc.getTable() != null) { - computeColumnFilter(desc.getTable().getBaseSchema()); + computeColumnsFilter(desc.getTable().getBaseSchema()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java index 2fda6fbd28..a4751b5205 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalScanNode.java @@ -55,14 +55,14 @@ public abstract class ExternalScanNode extends ScanNode { public void init(Analyzer analyzer) throws UserException { super.init(analyzer); computeStats(analyzer); - computeColumnFilter(); + computeColumnsFilter(); initBackendPolicy(); } // For Nereids @Override public void init() throws UserException { - computeColumnFilter(); + computeColumnsFilter(); initBackendPolicy(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java index e654a64c70..312a09d88e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java @@ -139,7 +139,7 @@ public abstract class FileQueryScanNode extends FileScanNode { table.getName())); } } - computeColumnFilter(); + computeColumnsFilter(); initBackendPolicy(); initSchemaParams(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java index 20a3a1cf6d..c92c46659e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java @@ -117,7 +117,7 @@ public class HudiScanNode extends HiveScanNode { String.format("Querying external view '%s.%s' is not supported", table.getDbName(), table.getName())); } - computeColumnFilter(); + computeColumnsFilter(); initBackendPolicy(); initSchemaParams(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java index a68d5edcb2..aedc62869c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java @@ -69,7 +69,7 @@ public class PaimonScanNode extends FileQueryScanNode { String.format("Querying external view '%s.%s' is not supported", table.getDbName(), table.getName())); } - computeColumnFilter(); + computeColumnsFilter(); initBackendPolicy(); source = new PaimonSource((PaimonExternalTable) table, desc, columnNameToRange); Preconditions.checkNotNull(source); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org