This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch nested_column_prune in repository https://gitbox.apache.org/repos/asf/doris.git
commit 2c9392e15ae4f47f733bda68b35c5a437a19da4f Author: 924060929 <lanhuaj...@selectdb.com> AuthorDate: Fri Oct 10 22:25:26 2025 +0800 refactor interface and support fe part --- .../org/apache/doris/analysis/AccessPathInfo.java | 36 ++ .../org/apache/doris/analysis/SlotDescriptor.java | 25 + .../org/apache/doris/nereids/StatementContext.java | 21 + .../glue/translator/PhysicalPlanTranslator.java | 24 +- .../glue/translator/PlanTranslatorContext.java | 11 + .../doris/nereids/jobs/executor/Rewriter.java | 6 + .../org/apache/doris/nereids/rules/RuleType.java | 1 + .../rules/rewrite/NestedColumnCollector.java | 520 +++++++++++++++++++++ .../expressions/visitor/ScalarFunctionVisitor.java | 4 +- .../org/apache/doris/nereids/types/ArrayType.java | 2 +- .../org/apache/doris/nereids/types/MapType.java | 2 +- .../doris/nereids/types/NestedColumnPrunable.java | 22 + .../org/apache/doris/nereids/types/StructType.java | 6 +- .../apache/doris/nereids/types/VariantType.java | 2 +- .../nereids/rules/rewrite/PruneNestedColumn.java | 256 ++++++++++ .../apache/doris/utframe/TestWithFeService.java | 4 +- gensrc/thrift/Descriptors.thrift | 13 +- 17 files changed, 937 insertions(+), 18 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AccessPathInfo.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AccessPathInfo.java new file mode 100644 index 00000000000..24501a469d7 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AccessPathInfo.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/TupleDescriptor.java +// and modified by Doris + +package org.apache.doris.analysis; + +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.thrift.TColumnAccessPaths; + +import lombok.AllArgsConstructor; +import lombok.Data; + +/** AccessPathInfo */ +@Data +@AllArgsConstructor +public class AccessPathInfo { + private DataType prunedType; + private TColumnAccessPaths allAccessPaths; + private TColumnAccessPaths predicateAccessPaths; +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java index 58e1073e879..3c6a2c851a1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.ColumnStats; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Type; +import org.apache.doris.thrift.TColumnAccessPaths; import org.apache.doris.thrift.TSlotDescriptor; import com.google.common.base.MoreObjects; @@ -75,6 +76,8 @@ public class SlotDescriptor { private boolean needMaterialize = true; private boolean isAutoInc = false; private Expr virtualColumn = null; + private TColumnAccessPaths allAccessPaths; + private TColumnAccessPaths predicateAccessPaths; public SlotDescriptor(SlotId id, TupleDescriptor parent) { @@ -129,6 +132,22 @@ public class SlotDescriptor { return this.subColPath; } + public TColumnAccessPaths getAllAccessPaths() { + return allAccessPaths; + } + + public void setAllAccessPaths(TColumnAccessPaths allAccessPaths) { + this.allAccessPaths = allAccessPaths; + } + + public TColumnAccessPaths getPredicateAccessPaths() { + return predicateAccessPaths; + } + + public void setPredicateAccessPaths(TColumnAccessPaths predicateAccessPaths) { + this.predicateAccessPaths = predicateAccessPaths; + } + public TupleDescriptor getParent() { return parent; } @@ -335,6 +354,12 @@ public class SlotDescriptor { if (virtualColumn != null) { tSlotDescriptor.setVirtualColumnExpr(virtualColumn.treeToThrift()); } + if (allAccessPaths != null) { + tSlotDescriptor.setAllAccessPaths(allAccessPaths); + } + if (predicateAccessPaths != null) { + tSlotDescriptor.setPredicateAccessPaths(predicateAccessPaths); + } return tSlotDescriptor; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 746efa3cf95..e8d4b28ea83 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids; +import org.apache.doris.analysis.AccessPathInfo; import org.apache.doris.analysis.StatementBase; import org.apache.doris.analysis.TableScanParams; import org.apache.doris.analysis.TableSnapshot; @@ -275,6 +276,10 @@ public class StatementContext implements Closeable { private boolean isInsert = false; + private boolean hasNestedColumns; + + private Map<Integer, AccessPathInfo> slotIdToAcessPathInfo = new HashMap<>(); + public StatementContext() { this(ConnectContext.get(), null, 0); } @@ -994,4 +999,20 @@ public class StatementContext implements Closeable { public boolean isInsert() { return isInsert; } + + public boolean hasNestedColumns() { + return hasNestedColumns; + } + + public void setHasNestedColumns(boolean hasNestedColumns) { + this.hasNestedColumns = hasNestedColumns; + } + + public void setSlotIdToAccessPathInfo(int slotId, AccessPathInfo accessPathInfo) { + this.slotIdToAcessPathInfo.put(slotId, accessPathInfo); + } + + public Optional<AccessPathInfo> getAccessPathInfo(Slot slot) { + return Optional.ofNullable(this.slotIdToAcessPathInfo.get(slot.getExprId().asInt())); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index e3f9faedf65..2e8b26b41a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.glue.translator; +import org.apache.doris.analysis.AccessPathInfo; import org.apache.doris.analysis.AggregateInfo; import org.apache.doris.analysis.AnalyticWindow; import org.apache.doris.analysis.BinaryPredicate; @@ -74,6 +75,7 @@ import org.apache.doris.fs.TransactionScopeCachingDirectoryListerFactory; import org.apache.doris.info.BaseTableRefInfo; import org.apache.doris.info.TableNameInfo; import org.apache.doris.info.TableRefInfo; +import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.processor.post.runtimefilterv2.RuntimeFilterV2; import org.apache.doris.nereids.properties.DistributionSpec; @@ -172,6 +174,7 @@ import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.NestedColumnPrunable; import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.JoinUtils; @@ -246,6 +249,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; @@ -826,10 +830,26 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla OlapTable olapTable = olapScan.getTable(); // generate real output tuple TupleDescriptor tupleDescriptor = generateTupleDesc(slots, olapTable, context); + StatementContext statementContext = context.getStatementContext(); + + List<SlotDescriptor> slotDescriptors = tupleDescriptor.getSlots(); + for (int i = 0; i < slots.size(); i++) { + Slot slot = slots.get(i); + if (slot.getDataType() instanceof NestedColumnPrunable) { + Optional<AccessPathInfo> accessPathInfo = statementContext.getAccessPathInfo(slot); + if (accessPathInfo.isPresent()) { + SlotDescriptor slotDescriptor = slotDescriptors.get(i); + AccessPathInfo accessPath = accessPathInfo.get(); + slotDescriptor.setType(accessPath.getPrunedType().toCatalogDataType()); + slotDescriptor.setAllAccessPaths(accessPath.getAllAccessPaths()); + slotDescriptor.setPredicateAccessPaths(accessPath.getPredicateAccessPaths()); + } + } + } // put virtual column expr into slot desc Map<ExprId, Expression> slotToVirtualColumnMap = olapScan.getSlotToVirtualColumnMap(); - for (SlotDescriptor slotDescriptor : tupleDescriptor.getSlots()) { + for (SlotDescriptor slotDescriptor : slotDescriptors) { ExprId exprId = context.findExprId(slotDescriptor.getId()); if (slotToVirtualColumnMap.containsKey(exprId)) { slotDescriptor.setVirtualColumn(ExpressionTranslator.translate( @@ -899,7 +919,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla && !isComplexDataType(slot.getDataType()) && !StatisticConstants.isSystemTable(olapTable) && !inVisibleCol) { - context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId()); + context.addUnknownStatsColumn(olapScanNode, slotDescriptors.get(i).getId()); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index 89e1da82950..1dd79a033cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.IdGenerator; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.processor.post.TopnFilterContext; import org.apache.doris.nereids.processor.post.runtimefilterv2.RuntimeFilterContextV2; import org.apache.doris.nereids.trees.expressions.CTEId; @@ -67,6 +68,7 @@ import javax.annotation.Nullable; */ public class PlanTranslatorContext { private final ConnectContext connectContext; + private final StatementContext statementContext; private final List<PlanFragment> planFragments = Lists.newArrayList(); private DescriptorTable descTable; @@ -119,16 +121,20 @@ public class PlanTranslatorContext { private final Set<SlotId> virtualColumnIds = Sets.newHashSet(); + /** PlanTranslatorContext */ public PlanTranslatorContext(CascadesContext ctx) { this.connectContext = ctx.getConnectContext(); + this.statementContext = ctx.getStatementContext(); this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); this.topnFilterContext = ctx.getTopnFilterContext(); this.runtimeFilterV2Context = ctx.getRuntimeFilterV2Context(); this.descTable = new DescriptorTable(); } + /** PlanTranslatorContext */ public PlanTranslatorContext(CascadesContext ctx, DescriptorTable descTable) { this.connectContext = ctx.getConnectContext(); + this.statementContext = ctx.getStatementContext(); this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); this.topnFilterContext = ctx.getTopnFilterContext(); this.runtimeFilterV2Context = ctx.getRuntimeFilterV2Context(); @@ -141,6 +147,7 @@ public class PlanTranslatorContext { @VisibleForTesting public PlanTranslatorContext() { this.connectContext = null; + this.statementContext = new StatementContext(); this.translator = null; this.topnFilterContext = new TopnFilterContext(); IdGenerator<RuntimeFilterId> runtimeFilterIdGen = RuntimeFilterId.createGenerator(); @@ -180,6 +187,10 @@ public class PlanTranslatorContext { return connectContext; } + public StatementContext getStatementContext() { + return statementContext; + } + public Set<ScanNode> getScanNodeWithUnknownColumnStats() { return statsUnknownColumnsMap.keySet(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index f3f998bce89..820b6ec0453 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -109,6 +109,7 @@ import org.apache.doris.nereids.rules.rewrite.MergeProjectable; import org.apache.doris.nereids.rules.rewrite.MergeSetOperations; import org.apache.doris.nereids.rules.rewrite.MergeSetOperationsExcept; import org.apache.doris.nereids.rules.rewrite.MergeTopNs; +import org.apache.doris.nereids.rules.rewrite.NestedColumnCollector; import org.apache.doris.nereids.rules.rewrite.NormalizeSort; import org.apache.doris.nereids.rules.rewrite.OperativeColumnDerive; import org.apache.doris.nereids.rules.rewrite.OrExpansion; @@ -906,6 +907,11 @@ public class Rewriter extends AbstractBatchJobExecutor { ) )); } + rewriteJobs.add( + topic("nested column prune", + custom(RuleType.NESTED_COLUMN_PRUNING, NestedColumnCollector::new) + ) + ); rewriteJobs.addAll(jobs( topic("rewrite cte sub-tree after sub path push down", custom(RuleType.CLEAR_CONTEXT_STATUS, ClearContextStatus::new), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 37e357d8f0d..163c334290a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -218,6 +218,7 @@ public enum RuleType { ADD_PROJECT_FOR_JOIN(RuleTypeClass.REWRITE), ADD_PROJECT_FOR_UNIQUE_FUNCTION(RuleTypeClass.REWRITE), VARIANT_SUB_PATH_PRUNING(RuleTypeClass.REWRITE), + NESTED_COLUMN_PRUNING(RuleTypeClass.REWRITE), CLEAR_CONTEXT_STATUS(RuleTypeClass.REWRITE), COLUMN_PRUNING(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnCollector.java new file mode 100644 index 00000000000..d95ace059b4 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnCollector.java @@ -0,0 +1,520 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.analysis.AccessPathInfo; +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ArrayMap; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey; +import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue; +import org.apache.doris.nereids.trees.expressions.functions.scalar.MapKeys; +import org.apache.doris.nereids.trees.expressions.functions.scalar.MapValues; +import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionVisitor; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.NestedColumnPrunable; +import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StructField; +import org.apache.doris.nereids.types.StructType; +import org.apache.doris.nereids.util.Utils; +import org.apache.doris.thrift.TAccessPathType; +import org.apache.doris.thrift.TColumnAccessPaths; +import org.apache.doris.thrift.TColumnNameAccessPath; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Multimap; +import com.google.common.collect.TreeMultimap; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; + +/** NestedColumnCollector */ +public class NestedColumnCollector implements CustomRewriter { + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + StatementContext statementContext = jobContext.getCascadesContext().getStatementContext(); + // if (!statementContext.hasNestedColumns()) { + // return plan; + // } + + AccessPathCollector collector = new AccessPathCollector(); + List<AccessPathIsPredicate> slotToAccessPaths = collector.collectInPlan(plan, statementContext); + Map<Integer, AccessPathInfo> slotToResult = pruneDataType(slotToAccessPaths); + for (Entry<Integer, AccessPathInfo> kv : slotToResult.entrySet()) { + Integer slotId = kv.getKey(); + statementContext.setSlotIdToAccessPathInfo(slotId, kv.getValue()); + } + return plan; + } + + private static Map<Integer, AccessPathInfo> pruneDataType(List<AccessPathIsPredicate> slotToAccessPaths) { + Map<Integer, AccessPathInfo> result = new LinkedHashMap<>(); + Map<Slot, DataTypeAccessTree> slotIdToAllAccessTree = new LinkedHashMap<>(); + Map<Slot, DataTypeAccessTree> slotIdToPredicateAccessTree = new LinkedHashMap<>(); + + Comparator<List<String>> pathComparator + = Comparator.comparing(a -> StringUtils.join(a, ".")); + + Multimap<Integer, List<String>> allAccessPaths = TreeMultimap.create( + Comparator.naturalOrder(), pathComparator); + Multimap<Integer, List<String>> predicateAccessPaths = TreeMultimap.create( + Comparator.naturalOrder(), pathComparator); + + // first: build access data type tree + for (AccessPathIsPredicate accessPathIsPredicate : slotToAccessPaths) { + Slot slot = accessPathIsPredicate.slot; + List<String> path = accessPathIsPredicate.path; + + DataTypeAccessTree allAccessTree = slotIdToAllAccessTree.computeIfAbsent( + slot, i -> DataTypeAccessTree.ofRoot(slot) + ); + allAccessTree.setAccessByPath(path, 0); + allAccessPaths.put(slot.getExprId().asInt(), path); + + if (accessPathIsPredicate.isPredicate()) { + DataTypeAccessTree predicateAccessTree = slotIdToPredicateAccessTree.computeIfAbsent( + slot, i -> DataTypeAccessTree.ofRoot(slot) + ); + predicateAccessTree.setAccessByPath(path, 0); + predicateAccessPaths.put(slot.getExprId().asInt(), path); + } + } + + // second: build non-predicate access paths + for (Entry<Slot, DataTypeAccessTree> kv : slotIdToAllAccessTree.entrySet()) { + Slot slot = kv.getKey(); + DataTypeAccessTree accessTree = kv.getValue(); + DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType()); + + List<TColumnNameAccessPath> allPaths = new ArrayList<>(); + for (List<String> path : allAccessPaths.get(slot.getExprId().asInt())) { + if (path == null) { + throw new AnalysisException("This is a bug, please report this"); + } + TColumnNameAccessPath tPath = new TColumnNameAccessPath(); + tPath.setPath(path); + // only retain access the whole root + if (path.size() == 1) { + allPaths = ImmutableList.of(tPath); + break; + } + allPaths.add(tPath); + } + TColumnAccessPaths allPath = new TColumnAccessPaths(); + allPath.type = TAccessPathType.NAME; + allPath.setNameAccessPaths(allPaths); + + TColumnAccessPaths predicatePath = new TColumnAccessPaths(); + predicatePath.type = TAccessPathType.NAME; + predicatePath.setNameAccessPaths(new ArrayList<>()); + result.put(slot.getExprId().asInt(), new AccessPathInfo(prunedDataType, allPath, predicatePath)); + } + + // third: build predicate access path + for (Entry<Slot, DataTypeAccessTree> kv : slotIdToPredicateAccessTree.entrySet()) { + Slot slot = kv.getKey(); + + List<TColumnNameAccessPath> predicatePaths = new ArrayList<>(); + for (List<String> path : predicateAccessPaths.get(slot.getExprId().asInt())) { + if (path == null) { + throw new AnalysisException("This is a bug, please report this"); + } + TColumnNameAccessPath tPath = new TColumnNameAccessPath(); + tPath.setPath(path); + // only retain access the whole root + if (path.size() == 1) { + predicatePaths = ImmutableList.of(tPath); + break; + } + predicatePaths.add(tPath); + } + + AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt()); + accessPathInfo.getPredicateAccessPaths().name_access_paths.addAll(predicatePaths); + } + + return result; + } + + private class AccessPathCollector extends DefaultExpressionVisitor<Void, CollectorContext> { + private List<AccessPathIsPredicate> slotToAccessPaths = new ArrayList<>(); + + public List<AccessPathIsPredicate> collectInPlan( + Plan plan, StatementContext statementContext) { + boolean bottomFilter = plan instanceof LogicalFilter && plan.child(0).arity() == 0; + for (Expression expression : plan.getExpressions()) { + expression.accept(this, new CollectorContext(statementContext, bottomFilter)); + } + for (Plan child : plan.children()) { + collectInPlan(child, statementContext); + } + return slotToAccessPaths; + } + + private Void continueCollectAccessPath(Expression expr, CollectorContext context) { + return expr.accept(this, context); + } + + @Override + public Void visit(Expression expr, CollectorContext context) { + for (Expression child : expr.children()) { + child.accept(this, new CollectorContext(context.statementContext, context.bottomFilter)); + } + return null; + } + + @Override + public Void visitSlotReference(SlotReference slotReference, CollectorContext context) { + DataType dataType = slotReference.getDataType(); + if (dataType instanceof NestedColumnPrunable) { + context.accessPathBuilder.addPrefix(slotReference.getName()); + ImmutableList<String> path = Utils.fastToImmutableList(context.accessPathBuilder.accessPath); + slotToAccessPaths.add(new AccessPathIsPredicate(slotReference, path, context.bottomFilter)); + } + return null; + } + + @Override + public Void visitAlias(Alias alias, CollectorContext context) { + return alias.child(0).accept(this, context); + } + + @Override + public Void visitCast(Cast cast, CollectorContext context) { + return cast.child(0).accept(this, context); + } + + // array element at + @Override + public Void visitElementAt(ElementAt elementAt, CollectorContext context) { + List<Expression> arguments = elementAt.getArguments(); + Expression first = arguments.get(0); + if (first.getDataType().isArrayType() || first.getDataType().isMapType() + || first.getDataType().isVariantType()) { + context.accessPathBuilder.addPrefix("*"); + continueCollectAccessPath(first, context); + + for (int i = 1; i < arguments.size(); i++) { + visit(arguments.get(i), context); + } + return null; + } else { + return visit(elementAt, context); + } + } + + // struct element_at + @Override + public Void visitStructElement(StructElement structElement, CollectorContext context) { + List<Expression> arguments = structElement.getArguments(); + Expression struct = arguments.get(0); + Expression fieldName = arguments.get(1); + DataType fieldType = fieldName.getDataType(); + + if (fieldName.isLiteral() && (fieldType.isIntegerLikeType() || fieldType.isStringLikeType())) { + context.accessPathBuilder.addPrefix(((Literal) fieldName).getStringValue()); + return continueCollectAccessPath(struct, context); + } + + for (Expression argument : arguments) { + visit(argument, context); + } + return null; + } + + @Override + public Void visitMapKeys(MapKeys mapKeys, CollectorContext context) { + context.accessPathBuilder.addPrefix("KEYS"); + return continueCollectAccessPath(mapKeys.getArgument(0), context); + } + + @Override + public Void visitMapValues(MapValues mapValues, CollectorContext context) { + LinkedList<String> suffixPath = context.accessPathBuilder.accessPath; + if (!suffixPath.isEmpty() && suffixPath.get(0).equals("*")) { + CollectorContext removeStarContext + = new CollectorContext(context.statementContext, context.bottomFilter); + removeStarContext.accessPathBuilder.accessPath.addAll(suffixPath.subList(1, suffixPath.size())); + removeStarContext.accessPathBuilder.addPrefix("VALUES"); + return continueCollectAccessPath(mapValues.getArgument(0), removeStarContext); + } + context.accessPathBuilder.addPrefix("VALUES"); + return continueCollectAccessPath(mapValues.getArgument(0), context); + } + + @Override + public Void visitMapContainsKey(MapContainsKey mapContainsKey, CollectorContext context) { + context.accessPathBuilder.addPrefix("KEYS"); + return continueCollectAccessPath(mapContainsKey.getArgument(0), context); + } + + @Override + public Void visitMapContainsValue(MapContainsValue mapContainsValue, CollectorContext context) { + context.accessPathBuilder.addPrefix("VALUES"); + return continueCollectAccessPath(mapContainsValue.getArgument(0), context); + } + + @Override + public Void visitArrayMap(ArrayMap arrayMap, CollectorContext context) { + // Lambda lambda = (Lambda) arrayMap.getArgument(0); + // Expression array = arrayMap.getArgument(1); + + // String arrayName = lambda.getLambdaArgumentName(0); + return super.visitArrayMap(arrayMap, context); + } + } + + private static class CollectorContext { + private StatementContext statementContext; + private AccessPathBuilder accessPathBuilder; + private boolean bottomFilter; + + public CollectorContext(StatementContext statementContext, boolean bottomFilter) { + this.statementContext = statementContext; + this.accessPathBuilder = new AccessPathBuilder(); + this.bottomFilter = bottomFilter; + } + } + + private static class AccessPathBuilder { + private LinkedList<String> accessPath; + + public AccessPathBuilder() { + accessPath = new LinkedList<>(); + } + + public AccessPathBuilder addPrefix(String prefix) { + accessPath.addFirst(prefix); + return this; + } + + public List<String> toStringList() { + return new ArrayList<>(accessPath); + } + + @Override + public String toString() { + return String.join(".", accessPath); + } + } + + private static class DataTypeAccessTree { + private DataType type; + private boolean isRoot; + private boolean accessPartialChild; + private boolean accessAll; + private Map<String, DataTypeAccessTree> children = new LinkedHashMap<>(); + + public DataTypeAccessTree(DataType type) { + this(false, type); + } + + public DataTypeAccessTree(boolean isRoot, DataType type) { + this.isRoot = isRoot; + this.type = type; + } + + public void setAccessByPath(List<String> path, int accessIndex) { + if (accessIndex >= path.size()) { + accessAll = true; + return; + } else { + accessPartialChild = true; + } + + if (type.isStructType()) { + String fieldName = path.get(accessIndex); + DataTypeAccessTree child = children.get(fieldName); + child.setAccessByPath(path, accessIndex + 1); + return; + } else if (type.isArrayType()) { + DataTypeAccessTree child = children.get("*"); + if (path.get(accessIndex).equals("*")) { + // enter this array and skip next * + child.setAccessByPath(path, accessIndex + 1); + } + return; + } else if (type.isMapType()) { + String fieldName = path.get(accessIndex); + if (fieldName.equals("*")) { + // access value by the key, so we should access key and access value, then prune the value's type. + // e.g. map_column['id'] should access the keys, and access the values + DataTypeAccessTree keysChild = children.get("KEYS"); + DataTypeAccessTree valuesChild = children.get("VALUES"); + keysChild.accessAll = true; + valuesChild.setAccessByPath(path, accessIndex + 1); + return; + } else if (fieldName.equals("KEYS")) { + // only access the keys and not need enter keys, because it must be primitive type. + // e.g. map_keys(map_column) + DataTypeAccessTree keysChild = children.get("KEYS"); + keysChild.accessAll = true; + return; + } else if (fieldName.equals("VALUES")) { + // only access the values without keys, and maybe prune the value's data type. + // e.g. map_values(map_columns)[0] will access the array of values first, + // and then access the array, so the access path is ['VALUES', '*'] + DataTypeAccessTree valuesChild = children.get("VALUES"); + valuesChild.setAccessByPath(path, accessIndex + 1); + return; + } + } else if (isRoot) { + children.get(path.get(accessIndex)).setAccessByPath(path, accessIndex + 1); + return; + } + throw new AnalysisException("unsupported data type: " + type); + } + + public static DataTypeAccessTree ofRoot(Slot slot) { + DataTypeAccessTree child = of(slot.getDataType()); + DataTypeAccessTree root = new DataTypeAccessTree(true, NullType.INSTANCE); + root.children.put(slot.getName(), child); + return root; + } + + public static DataTypeAccessTree of(DataType type) { + DataTypeAccessTree root = new DataTypeAccessTree(type); + if (type instanceof StructType) { + StructType structType = (StructType) type; + for (Entry<String, StructField> kv : structType.getNameToFields().entrySet()) { + root.children.put(kv.getKey(), of(kv.getValue().getDataType())); + } + } else if (type instanceof ArrayType) { + root.children.put("*", of(((ArrayType) type).getItemType())); + } else if (type instanceof MapType) { + root.children.put("KEYS", of(((MapType) type).getKeyType())); + root.children.put("VALUES", of(((MapType) type).getValueType())); + } + return root; + } + + public Optional<DataType> pruneDataType() { + if (isRoot) { + return children.values().iterator().next().pruneDataType(); + } else if (accessAll) { + return Optional.of(type); + } else if (!accessPartialChild) { + return Optional.empty(); + } + + List<Pair<String, DataType>> accessedChildren = new ArrayList<>(); + + if (type instanceof StructType) { + for (Entry<String, DataTypeAccessTree> kv : children.entrySet()) { + DataTypeAccessTree childTypeTree = kv.getValue(); + Optional<DataType> childDataType = childTypeTree.pruneDataType(); + if (childDataType.isPresent()) { + accessedChildren.add(Pair.of(kv.getKey(), childDataType.get())); + } + } + } else if (type instanceof ArrayType) { + Optional<DataType> childDataType = children.get("*").pruneDataType(); + if (childDataType.isPresent()) { + accessedChildren.add(Pair.of("*", childDataType.get())); + } + } else if (type instanceof MapType) { + DataType prunedValueType = children.get("VALUES") + .pruneDataType() + .orElse(((MapType) type).getValueType()); + // can not prune keys but can prune values + accessedChildren.add(Pair.of("KEYS", ((MapType) type).getKeyType())); + accessedChildren.add(Pair.of("VALUES", prunedValueType)); + } + if (accessedChildren.isEmpty()) { + return Optional.of(type); + } + + return Optional.of(pruneDataType(type, accessedChildren)); + } + + private DataType pruneDataType(DataType dataType, List<Pair<String, DataType>> newChildrenTypes) { + if (dataType instanceof StructType) { + // prune struct fields + StructType structType = (StructType) dataType; + Map<String, StructField> nameToFields = structType.getNameToFields(); + List<StructField> newFields = new ArrayList<>(); + for (Pair<String, DataType> kv : newChildrenTypes) { + String fieldName = kv.first; + StructField originField = nameToFields.get(fieldName); + DataType prunedType = kv.second; + newFields.add(new StructField( + originField.getName(), prunedType, originField.isNullable(), originField.getComment() + )); + } + return new StructType(newFields); + } else if (dataType instanceof ArrayType) { + return ArrayType.of(newChildrenTypes.get(0).second, ((ArrayType) dataType).containsNull()); + } else if (dataType instanceof MapType) { + return MapType.of(newChildrenTypes.get(0).second, newChildrenTypes.get(1).second); + } else { + throw new AnalysisException("unsupported data type: " + dataType); + } + } + } + + private static class AccessPathIsPredicate { + private final Slot slot; + private final List<String> path; + private final boolean isPredicate; + + public AccessPathIsPredicate(Slot slot, List<String> path, boolean isPredicate) { + this.slot = slot; + this.path = path; + this.isPredicate = isPredicate; + } + + public Slot getSlot() { + return slot; + } + + public List<String> getPath() { + return path; + } + + public boolean isPredicate() { + return isPredicate; + } + + @Override + public String toString() { + return slot.getName() + ": " + String.join(".", path) + ", " + isPredicate; + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index f2a11017a71..37b6b6233bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -740,8 +740,8 @@ public interface ScalarFunctionVisitor<R, C> { return visitScalarFunction(arrayFlatten, context); } - default R visitArrayMap(ArrayMap arraySort, C context) { - return visitScalarFunction(arraySort, context); + default R visitArrayMap(ArrayMap arrayMap, C context) { + return visitScalarFunction(arrayMap, context); } default R visitArrayMatchAll(ArrayMatchAll arrayMatchAll, C context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/ArrayType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/ArrayType.java index 9a34ab740d3..6d4ec539ff3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/ArrayType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/ArrayType.java @@ -25,7 +25,7 @@ import java.util.Objects; /** * Array type in Nereids. */ -public class ArrayType extends DataType implements ComplexDataType { +public class ArrayType extends DataType implements ComplexDataType, NestedColumnPrunable { public static final ArrayType SYSTEM_DEFAULT = new ArrayType(NullType.INSTANCE, true); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/MapType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/MapType.java index fb8346987f7..176c1db1d0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/MapType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/MapType.java @@ -27,7 +27,7 @@ import java.util.Objects; * Struct type in Nereids. */ @Developing -public class MapType extends DataType implements ComplexDataType { +public class MapType extends DataType implements ComplexDataType, NestedColumnPrunable { public static final MapType SYSTEM_DEFAULT = new MapType(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/NestedColumnPrunable.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/NestedColumnPrunable.java new file mode 100644 index 00000000000..2d5e464f042 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/NestedColumnPrunable.java @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.types; + +/** NestedColumnPrunable */ +public interface NestedColumnPrunable { +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructType.java index ffbff7c61e1..a1c263976f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/StructType.java @@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -35,7 +36,7 @@ import java.util.stream.Collectors; * Struct type in Nereids. */ @Developing -public class StructType extends DataType implements ComplexDataType { +public class StructType extends DataType implements ComplexDataType, NestedColumnPrunable { public static final StructType SYSTEM_DEFAULT = new StructType(); @@ -55,7 +56,8 @@ public class StructType extends DataType implements ComplexDataType { public StructType(List<StructField> fields) { this.fields = ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be null")); // field name should be lowercase and check the same or not - this.nameToFields = new HashMap<>(); + // ATTN: should use LinkedHashMap to keep order + this.nameToFields = new LinkedHashMap<>(); for (StructField field : this.fields) { String fieldName = field.getName().toLowerCase(); StructField existingField = this.nameToFields.put(fieldName, field); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java index 7a5ff6d72d3..4cc3bfccf3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java @@ -36,7 +36,7 @@ import java.util.stream.Collectors; * Example: VARIANT <`a.b`:INT, `a.c`:DATETIMEV2> * */ -public class VariantType extends PrimitiveType { +public class VariantType extends PrimitiveType implements NestedColumnPrunable { public static final VariantType INSTANCE = new VariantType(0); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumn.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumn.java new file mode 100644 index 00000000000..7cccbf18496 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumn.java @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.catalog.Type; +import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.planner.OlapScanNode; +import org.apache.doris.planner.PlanFragment; +import org.apache.doris.thrift.TAccessPathType; +import org.apache.doris.thrift.TColumnNameAccessPath; +import org.apache.doris.utframe.TestWithFeService; + +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.TreeSet; + +public class PruneNestedColumn extends TestWithFeService { + @BeforeAll + public void createTable() throws Exception { + createDatabase("test"); + useDatabase("test"); + + createTable("create table tbl(\n" + + " id int,\n" + + " s struct<\n" + + " city: string,\n" + + " data: array<map<\n" + + " int,\n" + + " struct<a: int, b: double>\n" + + " >>\n" + + ">)\n" + + "properties ('replication_num'='1')"); + + connectContext.getSessionVariable().setDisableNereidsRules(RuleType.PRUNE_EMPTY_PARTITION.name()); + } + + @Test + public void testProject() throws Exception { + assertColumn("select 100 from tbl", null, null, null); + assertColumn("select s from tbl", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s")), + ImmutableList.of() + ); + assertColumn("select struct_element(s, 'city'), s from tbl", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s")), + ImmutableList.of() + ); + assertColumn("select struct_element(s, 'city') from tbl", + "struct<city:text>", + ImmutableList.of(path("s", "city")), + ImmutableList.of() + ); + assertColumn("select struct_element(s, 'data') from tbl", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data")), + ImmutableList.of() + ); + assertColumn("select struct_element(s, 'data')[1] from tbl", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*")), + ImmutableList.of() + ); + assertColumn("select map_keys(struct_element(s, 'data')[1]) from tbl", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*", "KEYS")), + ImmutableList.of() + ); + assertColumn("select map_values(struct_element(s, 'data')[1]) from tbl", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*", "VALUES")), + ImmutableList.of() + ); + assertColumn("select struct_element(map_values(struct_element(s, 'data')[1])[1], 'a') from tbl", + "struct<data:array<map<int,struct<a:int>>>>", + ImmutableList.of(path("s", "data", "*", "VALUES", "a")), + ImmutableList.of() + ); + assertColumn("select struct_element(s, 'data')[1][1] from tbl", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*", "*")), + ImmutableList.of() + ); + assertColumn("select struct_element(struct_element(s, 'data')[1][1], 'a') from tbl", + "struct<data:array<map<int,struct<a:int>>>>", + ImmutableList.of(path("s", "data", "*", "*", "a")), + ImmutableList.of() + ); + assertColumn("select struct_element(struct_element(s, 'data')[1][1], 'b') from tbl", + "struct<data:array<map<int,struct<b:double>>>>", + ImmutableList.of(path("s", "data", "*", "*", "b")), + ImmutableList.of() + ); + // assertColumn("select struct_element(struct_element(s, 'data')[1][1], 'b') from tbl where struct_element(s, 'city')='beijing", + // "struct<data:array<map<int,struct<b:double>>>>", + // predicatePath("city"), + // path("data", "*", "*", "b") + // ); + + // assertColumn("select array_map(x -> x[2], struct_element(s, 'data')) from tbl", "struct<data:array<map<int,struct<a:int,b:double>>>>", path("data")); + // assertColumn("select array_map(x -> struct_element(x[2], 'b'), struct_element(s, 'data')) from tbl", "struct<data:array<map<int,struct<b:double>>>>", path("data", "*", "*", "b")); + } + + @Test + public void testFilter() throws Throwable { + assertColumn("select 100 from tbl where s is not null", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s")), + ImmutableList.of(path("s")) + ); + + assertColumn("select 100 from tbl where if(id = 1, null, s) is not null and struct_element(s, 'city') = 'beijing'", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s")), + ImmutableList.of(path("s")) + ); + + assertColumn("select 100 from tbl where struct_element(s, 'city') is not null", + "struct<city:text>", + ImmutableList.of(path("s", "city")), + ImmutableList.of(path("s", "city")) + ); + + assertColumn("select 100 from tbl where struct_element(s, 'data') is not null", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data")), + ImmutableList.of(path("s", "data")) + ); + assertColumn("select 100 from tbl where struct_element(s, 'data')[1] is not null", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*")), + ImmutableList.of(path("s", "data", "*")) + ); + assertColumn("select 100 from tbl where map_keys(struct_element(s, 'data')[1]) is not null", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*", "KEYS")), + ImmutableList.of(path("s", "data", "*", "KEYS")) + ); + assertColumn("select 100 from tbl where map_values(struct_element(s, 'data')[1]) is not null", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*", "VALUES")), + ImmutableList.of(path("s", "data", "*", "VALUES")) + ); + assertColumn("select 100 from tbl where struct_element(map_values(struct_element(s, 'data')[1])[1], 'a') is not null", + "struct<data:array<map<int,struct<a:int>>>>", + ImmutableList.of(path("s", "data", "*", "VALUES", "a")), + ImmutableList.of(path("s", "data", "*", "VALUES", "a")) + ); + assertColumn("select 100 from tbl where struct_element(s, 'data')[1][1] is not null", + "struct<data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data", "*", "*")), + ImmutableList.of(path("s", "data", "*", "*")) + ); + assertColumn("select 100 from tbl where struct_element(struct_element(s, 'data')[1][1], 'a') is not null", + "struct<data:array<map<int,struct<a:int>>>>", + ImmutableList.of(path("s", "data", "*", "*", "a")), + ImmutableList.of(path("s", "data", "*", "*", "a")) + ); + assertColumn("select 100 from tbl where struct_element(struct_element(s, 'data')[1][1], 'b') is not null", + "struct<data:array<map<int,struct<b:double>>>>", + ImmutableList.of(path("s", "data", "*", "*", "b")), + ImmutableList.of(path("s", "data", "*", "*", "b")) + ); + } + + @Test + public void testProjectFilter() throws Throwable { + assertColumn("select s from tbl where struct_element(s, 'city') is not null", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s")), + ImmutableList.of(path("s", "city")) + ); + + assertColumn("select struct_element(s, 'data') from tbl where struct_element(s, 'city') is not null", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data"), path("s", "city")), + ImmutableList.of(path("s", "city")) + ); + + assertColumn("select struct_element(s, 'data') from tbl where struct_element(s, 'city') is not null and struct_element(s, 'data') is not null", + "struct<city:text,data:array<map<int,struct<a:int,b:double>>>>", + ImmutableList.of(path("s", "data"), path("s", "city")), + ImmutableList.of(path("s", "data"), path("s", "city")) + ); + } + + private void assertColumn(String sql, String expectType, + List<TColumnNameAccessPath> expectAllAccessPaths, + List<TColumnNameAccessPath> expectPredicateAccessPaths) throws Exception { + List<SlotDescriptor> slotDescriptors = collectComplexSlots(sql); + if (expectType == null) { + Assertions.assertEquals(0, slotDescriptors.size()); + return; + } + + Assertions.assertEquals(1, slotDescriptors.size()); + Assertions.assertEquals(expectType, slotDescriptors.get(0).getType().toString()); + + Assertions.assertEquals(TAccessPathType.NAME, slotDescriptors.get(0).getAllAccessPaths().type); + TreeSet<TColumnNameAccessPath> expectAllAccessPathSet = new TreeSet<>(expectAllAccessPaths); + TreeSet<TColumnNameAccessPath> actualAllAccessPaths + = new TreeSet<>(slotDescriptors.get(0).getAllAccessPaths().name_access_paths); + Assertions.assertEquals(expectAllAccessPathSet, actualAllAccessPaths); + + Assertions.assertEquals(TAccessPathType.NAME, slotDescriptors.get(0).getPredicateAccessPaths().type); + TreeSet<TColumnNameAccessPath> expectPredicateAccessPathSet = new TreeSet<>(expectPredicateAccessPaths); + TreeSet<TColumnNameAccessPath> actualPredicateAccessPaths + = new TreeSet<>(slotDescriptors.get(0).getPredicateAccessPaths().name_access_paths); + Assertions.assertEquals(expectPredicateAccessPathSet, actualPredicateAccessPaths); + } + + private List<SlotDescriptor> collectComplexSlots(String sql) throws Exception { + NereidsPlanner planner = (NereidsPlanner) getSqlStmtExecutor(sql).planner(); + List<SlotDescriptor> complexSlots = new ArrayList<>(); + for (PlanFragment fragment : planner.getFragments()) { + List<OlapScanNode> olapScanNodes = fragment.getPlanRoot().collectInCurrentFragment(OlapScanNode.class::isInstance); + for (OlapScanNode olapScanNode : olapScanNodes) { + List<SlotDescriptor> slots = olapScanNode.getTupleDesc().getSlots(); + for (SlotDescriptor slot : slots) { + Type type = slot.getType(); + if (type.isComplexType() || type.isVariantType()) { + complexSlots.add(slot); + } + } + } + } + return complexSlots; + } + + private TColumnNameAccessPath path(String... path) { + return new TColumnNameAccessPath(ImmutableList.copyOf(path)); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java index 14e13520620..bbe3885b637 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java +++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java @@ -558,8 +558,8 @@ public abstract class TestWithFeService { && connectContext.getState().getErrorCode() == null) { return stmtExecutor; } else { - // throw new IllegalStateException(connectContext.getState().getErrorMessage()); - return null; + throw new IllegalStateException(connectContext.getState().getErrorMessage()); + // return null; } } diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index d45e919aae1..5276f9df4cb 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -77,7 +77,8 @@ struct TSlotDescriptor { 16: optional string col_default_value 17: optional Types.TPrimitiveType primitive_type = Types.TPrimitiveType.INVALID_TYPE 18: optional Exprs.TExpr virtual_column_expr - 19: optional TColumnAccessPaths column_access_paths + 19: optional TColumnAccessPaths all_access_paths + 20: optional TColumnAccessPaths predicate_access_paths } enum TAccessPathType { @@ -106,19 +107,17 @@ struct TColumnNameAccessPath { // > // > // > - // if we want to access `map_keys(s.data[0])`, the path will be: ['data', '*', 'KEYS'], - // if we want to access `map_values(s.data[0])[0].b`, the path will be: ['data', '*', 'VALUES', 'b'], - // if we want to access `s.data[0]['k'].b`, the path will be ['data', '*', '*', 'b'] - // if we want to access the whole struct of s, the path will be: [], + // if we want to access `map_keys(s.data[0])`, the path will be: ['s', 'data', '*', 'KEYS'], + // if we want to access `map_values(s.data[0])[0].b`, the path will be: ['s', 'data', '*', 'VALUES', 'b'], + // if we want to access `s.data[0]['k'].b`, the path will be ['s', 'data', '*', '*', 'b'] + // if we want to access the whole struct of s, the path will be: ['s'], 1: required list<string> path - 2: required bool is_predicate } /* // implement in the future struct TIcebergColumnAccessPath { 1: required list<i64> path - 2: required bool is_predicate } */ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org