This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9848cd84be6ed07fe542b82d2e2628e658690621 Author: Steve Carlin <[email protected]> AuthorDate: Thu Mar 28 12:19:34 2024 -0700 IMPALA-12954: Implement Sorting capability for Calcite planner The Sort RelNode is now supported. This includes limit an offset features as well. A minor bit of code was copied from the original planner, but it just involved decision making on which Sort PlanNode to call, so this probably doesn't need to be refactored. Change-Id: I747e107ed996862ef348f829deee47f0c0fc78d5 Reviewed-on: http://gerrit.cloudera.org:8080/21237 Reviewed-by: Michael Smith <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../impala/calcite/functions/FunctionResolver.java | 5 + .../calcite/rel/node/ConvertToImpalaRelRules.java | 13 ++ .../impala/calcite/rel/node/ImpalaPlanRel.java | 1 + .../impala/calcite/rel/node/ImpalaSortRel.java | 236 +++++++++++++++++++++ .../impala/calcite/service/CalciteOptimizer.java | 1 + .../queries/QueryTest/calcite.test | 115 ++++++++++ 6 files changed, 371 insertions(+) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java index 22e8514a0..3101e94d3 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/functions/FunctionResolver.java @@ -48,6 +48,11 @@ public class FunctionResolver { public static Map<SqlKind, String> CALCITE_KIND_TO_IMPALA_FUNC = ImmutableMap.<SqlKind, String> builder() .put(SqlKind.EQUALS, "eq") + .put(SqlKind.GREATER_THAN, "gt") + .put(SqlKind.GREATER_THAN_OR_EQUAL, "ge") + .put(SqlKind.LESS_THAN, "lt") + .put(SqlKind.LESS_THAN_OR_EQUAL, "le") + .put(SqlKind.NOT_EQUALS, "ne") .build(); public static Function getFunction(String name, SqlKind kind, diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ConvertToImpalaRelRules.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ConvertToImpalaRelRules.java index 32f376199..25dd29d76 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ConvertToImpalaRelRules.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ConvertToImpalaRelRules.java @@ -21,6 +21,7 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rel.logical.LogicalValues; @@ -70,6 +71,18 @@ public class ConvertToImpalaRelRules { } } + public static class ImpalaSortRule extends RelOptRule { + public ImpalaSortRule() { + super(operand(LogicalSort.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final LogicalSort sort = call.rel(0); + call.transformTo(new ImpalaSortRel(sort)); + } + } + public static class ImpalaUnionRule extends RelOptRule { public ImpalaUnionRule() { diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaPlanRel.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaPlanRel.java index 1e11b7d13..31eaa391a 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaPlanRel.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaPlanRel.java @@ -38,6 +38,7 @@ public interface ImpalaPlanRel { FILTER, HDFSSCAN, PROJECT, + SORT, UNION, VALUES } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaSortRel.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaSortRel.java new file mode 100644 index 000000000..e4546306f --- /dev/null +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaSortRel.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.impala.calcite.rel.node; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.SlotDescriptor; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.ExprSubstitutionMap; +import org.apache.impala.analysis.SortInfo; +import org.apache.impala.common.AnalysisException; +import org.apache.impala.common.ImpalaException; +import org.apache.impala.planner.PlannerContext; +import org.apache.impala.planner.PlanNode; +import org.apache.impala.planner.SortNode; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ImpalaSortRel + * + * IMPALA-13172: Optimizations for tryConvertToTopN needed. This should probably + * be made rule based and done in the optimization phase, but keeping track + * here because the Impala code was in the SortNode. + */ +public class ImpalaSortRel extends Sort + implements ImpalaPlanRel { + + protected static final Logger LOG = + LoggerFactory.getLogger(ImpalaSortRel.class.getName()); + + private final long limit_; + private final long offset_; + + public ImpalaSortRel(Sort sort) { + super(sort.getCluster(), sort.getTraitSet(), sort.getHints(), sort.getInput(), + sort.getCollation(), sort.offset, sort.fetch); + limit_ = this.fetch != null ? + ((BigDecimal) RexLiteral.value(this.fetch)).longValue() : -1L; + offset_ = this.offset != null ? + ((BigDecimal) RexLiteral.value(this.offset)).longValue() : 0L; + } + + private ImpalaSortRel(RelOptCluster cluster, RelTraitSet traitSet, + RelNode newInput, RelCollation newCollation, RexNode offset, RexNode fetch) { + super(cluster, traitSet, new ArrayList<>(), newInput, newCollation, offset, fetch); + limit_ = this.fetch != null ? + ((BigDecimal) RexLiteral.value(this.fetch)).longValue() : -1L; + offset_ = this.offset != null ? + ((BigDecimal) RexLiteral.value(this.offset)).longValue() : 0L; + } + + @Override + public Sort copy(RelTraitSet traitSet, RelNode newInput, RelCollation newCollation, + RexNode offset, RexNode fetch) { + return new ImpalaSortRel(getCluster(), traitSet, newInput, newCollation, + offset, fetch); + } + + @Override + public NodeWithExprs getPlanNode(ParentPlanRelContext context) throws ImpalaException { + + if (limit_ == 0) { + return NodeCreationUtils.createEmptySetPlanNode( + context.ctx_.getNextNodeId(), context.ctx_.getRootAnalyzer(), getRowType()); + } + + List<RelFieldCollation> fieldCollations = getCollation().getFieldCollations(); + + NodeWithExprs inputNodeWithExprs = getChildPlanNode(context); + + // If there's limit without order-by, we don't need to generate + // a sort or top-n node..just set the limit on the child + if (fieldCollations.size() == 0) { + validateUnorderedLimit(context.filterCondition_, limit_, offset_); + // Mutating an existing object here. Either we should pass in a context into + // all PlanNodes containing the limit so the PlanNode constructor can set the + // limit, or leave the code here to mutate. + inputNodeWithExprs.planNode_.setLimit(limit_); + return inputNodeWithExprs; + } + + List<Expr> inputExprs = inputNodeWithExprs.outputExprs_; + + List<Boolean> isAscOrder = fieldCollations.stream() + .map(t -> (t.direction == RelFieldCollation.Direction.ASCENDING)) + .collect(Collectors.toList()); + List<Boolean> nullsFirstParams = fieldCollations.stream() + .map(t -> (t.nullDirection == RelFieldCollation.NullDirection.FIRST)) + .collect(Collectors.toList()); + List<Expr> sortExprs = fieldCollations.stream() + .map(t -> getExpr(inputExprs, t.getFieldIndex())) + .collect(Collectors.toList()); + + SortInfo sortInfo = new SortInfo(sortExprs, isAscOrder, nullsFirstParams); + sortInfo.createSortTupleInfo(inputNodeWithExprs.outputExprs_, + context.ctx_.getRootAnalyzer()); + + // createOutputExprs also makes the slot materialized, so call this before calling + // materializeRequiredSlots because the latter checks for the isMaterialized flag + List<Expr> outputExprs = createOutputExprs(sortInfo, inputNodeWithExprs.outputExprs_, + context.ctx_.getRootAnalyzer()); + + sortInfo.materializeRequiredSlots(context.ctx_.getRootAnalyzer(), + new ExprSubstitutionMap()); + + // Call a specific implementation of createSortNode(). In the future, we could + // try to leverage Impala's SingleNodePlanner.createSortNode() + SortNode sortNode = createSortNode(context.ctx_, + inputNodeWithExprs.planNode_, sortInfo, limit_, offset_, limit_ != -1, + context.ctx_.getRootAnalyzer()); + + NodeWithExprs retNode = new NodeWithExprs(sortNode, outputExprs); + + // If there is a filter condition, a SelectNode will get added on top + // of the retNode. + return NodeCreationUtils.wrapInSelectNodeIfNeeded(context, retNode, + getCluster().getRexBuilder()); + } + + private NodeWithExprs getChildPlanNode(ParentPlanRelContext context + ) throws ImpalaException { + ImpalaPlanRel relInput = (ImpalaPlanRel) getInput(0); + ParentPlanRelContext.Builder builder = + new ParentPlanRelContext.Builder(context, this); + builder.setFilterCondition(null); + return relInput.getPlanNode(builder.build()); + } + + private Expr getExpr(List<Expr> exprs, int index) { + return exprs.get(index); + } + + private void validateUnorderedLimit(RexNode filterCondition, long limit, long offset) { + // The filter should have been pushed through by the optimizer. This is verified + // here because there is no code to support pushing the filter condition if it + // hits this code. + Preconditions.checkArgument(filterCondition == null); + Preconditions.checkArgument(limit_ > 0); + Preconditions.checkArgument(offset_ == 0); + } + + /** + * Create the output expressions for the SortRel. + * Impala can change the order in their SortInfo object. So the SlotDescriptors + * do not necessarily line up with the indexes. So we need to walk through the + * expressions of the input node and match them up with the corresponding SlotRef. + */ + public List<Expr> createOutputExprs(SortInfo sortInfo, + List<Expr> outputExprs, Analyzer analyzer) throws AnalysisException { + ImmutableList.Builder<Expr> builder = new ImmutableList.Builder(); + + // project all columns coming from the child since Sort does not change + // any projections but do the mapping based on its own substitution map + for (Expr outputExpr : outputExprs) { + builder.add(outputExpr.trySubstitute(sortInfo.getOutputSmap(), + analyzer, true)); + } + + for (SlotDescriptor slotDesc : sortInfo.getSortTupleDescriptor().getSlots()) { + slotDesc.setIsMaterialized(true); + } + + return builder.build(); + } + + + /** + * Creates and initializes either a SortNode or a TopNNode depending on various + * heuristics and configuration parameters. + */ + + public static SortNode createSortNode(PlannerContext planCtx, PlanNode root, + SortInfo sortInfo, long limit, long offset, boolean hasLimit, + Analyzer analyzer) throws ImpalaException { + SortNode sortNode = createSortNode(planCtx, root, sortInfo, limit, offset, hasLimit); + Preconditions.checkState(sortNode.hasValidStats()); + sortNode.setLimit(limit); + sortNode.init(analyzer); + return sortNode; + } + + /** + * Creates and initializes either a SortNode or a TopNNode depending on various + * heuristics and configuration parameters. + */ + public static SortNode createSortNode(PlannerContext planCtx, PlanNode root, + SortInfo sortInfo, long limit, long offset, boolean hasLimit + ) throws ImpalaException { + + if (!hasLimit) { + return SortNode.createTotalSortNode(planCtx.getNextNodeId(), root, sortInfo, + offset); + } + + return SortNode.createTopNSortNode(planCtx.getQueryOptions(), planCtx.getNextNodeId(), + root, sortInfo, offset, limit, false); + } + + @Override + public RelNodeType relNodeType() { + return RelNodeType.SORT; + } +} diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java index b194ecae9..e34612980 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java @@ -57,6 +57,7 @@ public class CalciteOptimizer implements CompilerStep { ImmutableList.of( new ConvertToImpalaRelRules.ImpalaScanRule(), new ConvertToImpalaRelRules.ImpalaFilterRule(), + new ConvertToImpalaRelRules.ImpalaSortRule(), new ConvertToImpalaRelRules.ImpalaValuesRule(), new ConvertToImpalaRelRules.ImpalaUnionRule(), new ConvertToImpalaRelRules.ImpalaProjectRule())); diff --git a/testdata/workloads/functional-query/queries/QueryTest/calcite.test b/testdata/workloads/functional-query/queries/QueryTest/calcite.test index 240ac532d..0e31bc0ef 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/calcite.test +++ b/testdata/workloads/functional-query/queries/QueryTest/calcite.test @@ -203,3 +203,118 @@ select * from (values (1)) union (values (2), (3)); # test suite which will have this problem. int ==== +---- QUERY +# sort test +select id, abs(bigint_col) from functional.alltypestiny where id > 3 order by abs(bigint_col), id; +---- RESULTS +4,0 +6,0 +5,10 +7,10 +---- TYPES +int, bigint +==== +---- QUERY +# sort test +select id, abs(bigint_col) from functional.alltypestiny where id >= 3 order by abs(bigint_col), id; +---- RESULTS +4,0 +6,0 +3,10 +5,10 +7,10 +---- TYPES +int, bigint +==== +---- QUERY +# sort test +select id, abs(bigint_col) from functional.alltypestiny where id < 3 order by abs(bigint_col), id; +---- RESULTS +0,0 +2,0 +1,10 +---- TYPES +int, bigint +==== +---- QUERY +# sort test +select id, abs(bigint_col) from functional.alltypestiny where id <= 3 order by abs(bigint_col), id; +---- RESULTS +0,0 +2,0 +1,10 +3,10 +---- TYPES +int, bigint +==== +---- QUERY +# sort test +select id, abs(bigint_col) from functional.alltypestiny where id != 3 order by abs(bigint_col), id; +---- RESULTS +0,0 +2,0 +4,0 +6,0 +1,10 +5,10 +7,10 +---- TYPES +int, bigint +==== +---- QUERY +# sort test +select id, abs(bigint_col) from functional.alltypestiny where id != 3 order by abs(bigint_col) desc, id; +---- RESULTS +1,10 +5,10 +7,10 +0,0 +2,0 +4,0 +6,0 +---- TYPES +int, bigint +==== +---- QUERY +# sort test +select group_str, some_nulls from functional.nullrows where group_str = 'a' order by some_nulls nulls first; +---- RESULTS +'a','NULL' +'a','NULL' +'a','NULL' +'a','NULL' +'a','a' +---- TYPES +string, string +==== +---- QUERY +# sort test +select group_str, some_nulls from functional.nullrows where group_str = 'a' order by some_nulls nulls last; +---- RESULTS +'a','a' +'a','NULL' +'a','NULL' +'a','NULL' +'a','NULL' +---- TYPES +string, string +==== +---- QUERY +# limit test +select bigint_col from functional.alltypestiny where bigint_col = 0 limit 2; +---- RESULTS +0 +0 +---- TYPES +bigint +==== +---- QUERY +# limit test +select id, abs(bigint_col) from functional.alltypestiny where id > 2 order by abs(bigint_col), id limit 3; +---- RESULTS +4,0 +6,0 +3,10 +---- TYPES +int, bigint +====
