[ https://issues.apache.org/jira/browse/HIVE-26524?focusedWorklogId=810902&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-810902 ]
ASF GitHub Bot logged work on HIVE-26524: ----------------------------------------- Author: ASF GitHub Bot Created on: 21/Sep/22 17:12 Start Date: 21/Sep/22 17:12 Worklog Time Spent: 10m Work Description: asolimando commented on code in PR #3588: URL: https://github.com/apache/hive/pull/3588#discussion_r976632375 ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java: ########## @@ -308,17 +390,26 @@ private void convertOrderLimitToASTNode(HiveSortLimit hiveSortLimit) { private void convertSortToASTNode(HiveSortExchange hiveSortExchange) { List<RelFieldCollation> fieldCollations = hiveSortExchange.getCollation().getFieldCollations(); - convertFieldCollationsToASTNode(hiveSortExchange, new Schema(hiveSortExchange), fieldCollations, + convertFieldCollationsAndSetOrderAST(hiveSortExchange, new Schema(hiveSortExchange), fieldCollations, null, HiveParser.TOK_SORTBY, "TOK_SORTBY"); } - private void convertFieldCollationsToASTNode( + private void convertFieldCollationsAndSetOrderAST( RelNode node, Schema schema, List<RelFieldCollation> fieldCollations, Map<Integer, RexNode> obRefToCallMap, int astToken, String astText) { + if (fieldCollations.isEmpty()) { return; } + hiveAST.order = convertFieldCollationsToASTNode( + node.getCluster().getRexBuilder(), schema, fieldCollations, obRefToCallMap, astToken, astText); + } + + public static ASTNode convertFieldCollationsToASTNode( Review Comment: Does it have to be `public`? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteToDataSketchesRules.java: ########## @@ -336,16 +336,13 @@ protected VBuilderPAP(Aggregate aggregate, Project project, String sketchClass) @Override boolean isApplicable(AggregateCall aggCall) { - if ((aggInput instanceof Project) - && !aggCall.isDistinct() && aggCall.getArgList().size() == 4 + if ((aggInput != null) Review Comment: We don't need the parentheses anymore: ```suggestion if (aggInput != null ``` ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java: ########## @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.rules.PruneEmptyRules; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides access to Calcite's {@link PruneEmptyRules}. + * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}. + */ +public class HiveRemoveEmptySingleRules extends PruneEmptyRules { + + public static final RelOptRule PROJECT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyProject") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Project.class, project -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule FILTER_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyFilter") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Filter.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule JOIN_LEFT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs(), + b2 -> b2.operand(RelNode.class).anyInputs())) + .withDescription("HivePruneEmptyJoin(left)") + .as(JoinLeftEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}. + * In case of right outer join if the left branch is empty the join operator can be removed + * and take the right branch only. + * + * select * from (select * from emp where 1=0) right join dept + * to + * select null as emp.col0 ... null as emp.coln, dept.* from dept + */ + public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + HiveValues empty = call.rel(1); + RelNode right = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnLeft()) { + // "select * from emp right join dept" is not necessarily empty if + // emp is empty + List<RexNode> projects = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + // left + addNullLiterals(rexBuilder, empty, projects, columnNames); + // right + copyProjects(rexBuilder, right.getRowType(), + join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames); + + RelNode project = call.builder().push(right).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + private static void addNullLiterals( + RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) { + for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) { + RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i); + RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType()); + projectFields.add(nullLiteral); + newColumnNames.add(empty.getRowType().getFieldList().get(i).getName()); + } + } + + public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType, Review Comment: I guess this methods can be `private`. If not, please add some javadoc to it. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java: ########## @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.rules.PruneEmptyRules; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides access to Calcite's {@link PruneEmptyRules}. + * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}. + */ +public class HiveRemoveEmptySingleRules extends PruneEmptyRules { + + public static final RelOptRule PROJECT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyProject") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Project.class, project -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule FILTER_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyFilter") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Filter.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule JOIN_LEFT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs(), + b2 -> b2.operand(RelNode.class).anyInputs())) + .withDescription("HivePruneEmptyJoin(left)") + .as(JoinLeftEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}. + * In case of right outer join if the left branch is empty the join operator can be removed + * and take the right branch only. + * + * select * from (select * from emp where 1=0) right join dept + * to + * select null as emp.col0 ... null as emp.coln, dept.* from dept + */ + public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + HiveValues empty = call.rel(1); + RelNode right = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnLeft()) { + // "select * from emp right join dept" is not necessarily empty if + // emp is empty + List<RexNode> projects = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + // left + addNullLiterals(rexBuilder, empty, projects, columnNames); + // right + copyProjects(rexBuilder, right.getRowType(), + join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames); + + RelNode project = call.builder().push(right).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + private static void addNullLiterals( + RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) { + for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) { + RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i); + RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType()); + projectFields.add(nullLiteral); + newColumnNames.add(empty.getRowType().getFieldList().get(i).getName()); + } + } + + public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType, + RelDataType castRowType, int castRowTypeOffset, + List<RexNode> outProjects, List<String> outProjectNames) { + + for (int i = 0; i < inRowType.getFieldCount(); ++i) { + RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i); + RexInputRef inputRef = rexBuilder.makeInputRef(relDataTypeField.getType(), i); + RexNode cast = rexBuilder.makeCast( + castRowType.getFieldList().get(castRowTypeOffset + i).getType(), inputRef); + outProjects.add(cast); + outProjectNames.add(relDataTypeField.getName()); + } + } + + public static final RelOptRule JOIN_RIGHT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(RelNode.class).anyInputs(), + b2 -> b2.operand(Values.class).predicate(Values::isEmpty) + .noInputs())) + .withDescription("HivePruneEmptyJoin(right)") + .as(JoinRightEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinRightEmptyRuleConfig}. + * In case of left outer join if the right branch is empty the join operator can be removed + * and take the left branch only. + * + * select * from emp right join (select * from dept where 1=0) + * to + * select emp.*, null as dept.col0 ... null as dept.coln from emp + */ + public interface JoinRightEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + RelNode left = call.rel(1); + HiveValues empty = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnRight()) { + // "select * from emp left join dept" is not necessarily empty if + // dept is empty + List<RexNode> projects = new ArrayList<>( + left.getRowType().getFieldCount() + empty.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + left.getRowType().getFieldCount() + empty.getRowType().getFieldCount()); + // left + copyProjects(rexBuilder, left.getRowType(), join.getRowType(), 0, projects, columnNames); + // right + addNullLiterals(rexBuilder, empty, projects, columnNames); + + RelNode project = call.builder().push(left).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + if (join.getJoinType() == JoinRelType.ANTI) { + // In case of anti join: Join(X, Empty, ANTI) becomes X + call.transformTo(join.getLeft()); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + public static final RelOptRule SORT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptySort") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Sort.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule SORT_FETCH_ZERO_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b -> + b.operand(Sort.class).anyInputs()) + .withDescription("HivePruneSortLimit0") + .as(PruneEmptyRules.SortFetchZeroRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule AGGREGATE_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyAggregate") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Aggregate.class, Aggregate::isNotGrandTotal) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule UNION_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Union.class).unorderedInputs(b1 -> + b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs())) + .withDescription("HivePruneEmptyUnionBranch") + .as(HiveUnionEmptyPruneRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Copy of {@link PruneEmptyRules.UnionEmptyPruneRuleConfig} but this version expects {@link Union}. + */ + public interface HiveUnionEmptyPruneRuleConfig extends PruneEmptyRules.PruneEmptyRule.Config { + @Override default PruneEmptyRules.PruneEmptyRule toRule() { + return new PruneEmptyRules.PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + final Union union = call.rel(0); + final List<RelNode> inputs = union.getInputs(); + assert inputs != null; + final RelBuilder builder = call.builder(); + int nonEmptyInputs = 0; + for (RelNode input : inputs) { + if (!isEmpty(input)) { + builder.push(input); + nonEmptyInputs++; + } + } + assert nonEmptyInputs < inputs.size() + : "planner promised us at least one Empty child: " + + RelOptUtil.toString(union); + if (nonEmptyInputs == 0) { + builder.push(union).empty(); + } else { + builder.union(union.all, nonEmptyInputs); + builder.convert(union.getRowType(), true); + } + call.transformTo(builder.build()); + } + }; + } + } + + private static boolean isEmpty(RelNode node) { + if (node instanceof Values) { + return ((Values) node).getTuples().isEmpty(); + } + if (node instanceof HepRelVertex) { + return isEmpty(((HepRelVertex) node).getCurrentRel()); + } + // Note: relation input might be a RelSubset, so we just iterate over the relations + // in order to check if the subset is equivalent to an empty relation. + if (!(node instanceof RelSubset)) { + return false; + } + RelSubset subset = (RelSubset) node; + for (RelNode rel : subset.getRels()) { + if (isEmpty(rel)) { + return true; + } + } + return false; + } Review Comment: Minor: What if we got rid of the negated condition in this way? ```suggestion if (node instanceof RelSubset) { RelSubset subset = (RelSubset) node; for (RelNode rel : subset.getRels()) { if (isEmpty(rel)) { return true; } } } return false; } ``` ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveValues.java: ########## @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexLiteral; + +import java.util.List; + +/** + * Subclass of {@link org.apache.calcite.rel.core.Values}. + * Specialized to Hive engine. + */ +public class HiveValues extends Values { + + public HiveValues( + RelOptCluster cluster, + RelDataType rowType, + ImmutableList<ImmutableList<RexLiteral>> tuples, + RelTraitSet traits) { Review Comment: Minor: I don't think we align parameters vertically in Hive in general, we could also try to have them fit a single line like: ```suggestion public HiveValues( RelOptCluster cluster, RelDataType rowType, ImmutableList<ImmutableList<RexLiteral>> tuples, RelTraitSet traits) { ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRewriteToDataSketchesRules.java: ########## @@ -336,16 +336,13 @@ protected VBuilderPAP(Aggregate aggregate, Project project, String sketchClass) @Override boolean isApplicable(AggregateCall aggCall) { - if ((aggInput instanceof Project) - && !aggCall.isDistinct() && aggCall.getArgList().size() == 4 + if ((aggInput != null) + && !aggCall.isDistinct() && aggCall.getArgList().size() == 1 Review Comment: I am having a hard time understanding this change, do you have a high-level explanation that might help? Since others down the line might be puzzled, we could probably add few comments here. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java: ########## @@ -717,9 +719,12 @@ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Se final RexNode filterArg = aggCall.filterArg < 0 ? null : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg)); RelBuilder.AggCall newAggCall = - relBuilder.aggregateCall(aggCall.getAggregation(), - aggCall.isDistinct(), aggCall.isApproximate(), - filterArg, aggCall.name, args); + relBuilder.aggregateCall(aggCall.getAggregation(), args) + .distinct(aggCall.isDistinct()) + .filter(filterArg) + .approximate(aggCall.isApproximate()) + .sort(relBuilder.fields(aggCall.collation)) + .as(aggCall.name); Review Comment: Can you give a high level overview of the change here? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java: ########## @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.rules.PruneEmptyRules; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides access to Calcite's {@link PruneEmptyRules}. + * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}. + */ +public class HiveRemoveEmptySingleRules extends PruneEmptyRules { + + public static final RelOptRule PROJECT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyProject") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Project.class, project -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule FILTER_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyFilter") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Filter.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule JOIN_LEFT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs(), + b2 -> b2.operand(RelNode.class).anyInputs())) + .withDescription("HivePruneEmptyJoin(left)") + .as(JoinLeftEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}. + * In case of right outer join if the left branch is empty the join operator can be removed + * and take the right branch only. + * + * select * from (select * from emp where 1=0) right join dept + * to + * select null as emp.col0 ... null as emp.coln, dept.* from dept + */ + public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + HiveValues empty = call.rel(1); + RelNode right = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnLeft()) { + // "select * from emp right join dept" is not necessarily empty if + // emp is empty + List<RexNode> projects = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + // left + addNullLiterals(rexBuilder, empty, projects, columnNames); + // right + copyProjects(rexBuilder, right.getRowType(), + join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames); + + RelNode project = call.builder().push(right).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + private static void addNullLiterals( + RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) { + for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) { + RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i); + RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType()); + projectFields.add(nullLiteral); + newColumnNames.add(empty.getRowType().getFieldList().get(i).getName()); + } + } + + public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType, + RelDataType castRowType, int castRowTypeOffset, + List<RexNode> outProjects, List<String> outProjectNames) { + + for (int i = 0; i < inRowType.getFieldCount(); ++i) { + RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i); + RexInputRef inputRef = rexBuilder.makeInputRef(relDataTypeField.getType(), i); + RexNode cast = rexBuilder.makeCast( + castRowType.getFieldList().get(castRowTypeOffset + i).getType(), inputRef); Review Comment: Since those two methods are used for both left and right join rules, I'd rather place them before the two rules or after them, not in the middle. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java: ########## @@ -121,7 +127,83 @@ public static ASTNode convert(final RelNode relNode, List<FieldSchema> resultSch return c.convert(); } + // TOK_QUERY + // TOK_INSERT + // TOK_DESTINATION + // TOK_DIR + // TOK_TMP_FILE + // TOK_SELECT + // TOK_SELEXPR + // TOK_FUNCTION + // TOK_<type> + // TOK_NULL + // alias0 + // ... + // TOK_SELEXPR + // TOK_FUNCTION + // TOK_<type> + // TOK_NULL + // aliasn + // TOK_LIMIT + // 0 + // 0 + private static ASTNode emptyPlan(RelDataType dataType) { Review Comment: As discussed off-line, can we have a unit test covering the translation (especially the typed null bit)? I am positive that the method is doing what it should, I am more worried about future regressions if it gets touched. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/opconventer/HiveValuesVisitor.java: ########## @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer; + +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.LimitDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +class HiveValuesVisitor extends HiveRelNodeVisitor<HiveValues> { + HiveValuesVisitor(HiveOpConverter hiveOpConverter) { + super(hiveOpConverter); + } + + @Override + OpAttr visit(HiveValues valuesRel) throws SemanticException { + + LOG.debug("Translating operator rel#{}:{} with row type: [{}]", + valuesRel.getId(), valuesRel.getRelTypeName(), valuesRel.getRowType()); + LOG.debug("Operator rel#{}:{} has {} tuples.", + valuesRel.getId(), valuesRel.getRelTypeName(), valuesRel.tuples.size()); + + if (!Values.isEmpty(valuesRel)) { + LOG.error("Empty {} operator translation not supported yet in return path.", + valuesRel.getClass().getCanonicalName()); + return null; + } + + // 1. collect columns for project row schema + List<String> columnNames = new ArrayList<>(); + List<ExprNodeDesc> exprNodeDescList = new ArrayList<>(); + Map<String, ExprNodeDesc> colExprMap = new HashMap<>(); + + ArrayList<ColumnInfo> colInfoList = new ArrayList<>(); Review Comment: Nitpick, prefer the interface if possible: ```suggestion List<ColumnInfo> colInfoList = new ArrayList<>(); ``` ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java: ########## @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.rules.PruneEmptyRules; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides access to Calcite's {@link PruneEmptyRules}. + * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}. + */ +public class HiveRemoveEmptySingleRules extends PruneEmptyRules { + + public static final RelOptRule PROJECT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyProject") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Project.class, project -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule FILTER_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyFilter") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Filter.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule JOIN_LEFT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs(), + b2 -> b2.operand(RelNode.class).anyInputs())) + .withDescription("HivePruneEmptyJoin(left)") + .as(JoinLeftEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}. + * In case of right outer join if the left branch is empty the join operator can be removed + * and take the right branch only. + * + * select * from (select * from emp where 1=0) right join dept + * to + * select null as emp.col0 ... null as emp.coln, dept.* from dept + */ + public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + HiveValues empty = call.rel(1); + RelNode right = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnLeft()) { + // "select * from emp right join dept" is not necessarily empty if Review Comment: Can you update following Ruben's suggestion from the Calcite's PR here and for the corresponding "right" version of the rule? Issue Time Tracking ------------------- Worklog Id: (was: 810902) Time Spent: 1.5h (was: 1h 20m) > Use Calcite to remove sections of a query plan known never produces rows > ------------------------------------------------------------------------ > > Key: HIVE-26524 > URL: https://issues.apache.org/jira/browse/HIVE-26524 > Project: Hive > Issue Type: Improvement > Components: CBO > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 1.5h > Remaining Estimate: 0h > > Calcite has a set of rules to remove sections of a query plan known never > produces any rows. In some cases the whole plan can be removed. Such plans > are represented with a single {{Values}} operators with no tuples. ex.: > {code:java} > select y + 1 from (select a1 y, b1 z from t1 where b1 > 10) q WHERE 1=0 > {code} > {code:java} > HiveValues(tuples=[[]]) > {code} > Other cases when plan has outer join or set operators some branches can be > replaced with empty values moving forward in some cases the join/set operator > can be removed > {code:java} > select a2, b2 from t2 where 1=0 > union > select a1, b1 from t1 > {code} > {code:java} > HiveAggregate(group=[{0, 1}]) > HiveTableScan(table=[[default, t1]], table:alias=[t1]) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)