[ https://issues.apache.org/jira/browse/HIVE-26524?focusedWorklogId=811183&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-811183 ]
ASF GitHub Bot logged work on HIVE-26524: ----------------------------------------- Author: ASF GitHub Bot Created on: 22/Sep/22 11:54 Start Date: 22/Sep/22 11:54 Worklog Time Spent: 10m Work Description: kasakrisz commented on code in PR #3588: URL: https://github.com/apache/hive/pull/3588#discussion_r977561476 ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java: ########## @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.rules.PruneEmptyRules; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides access to Calcite's {@link PruneEmptyRules}. + * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}. + */ +public class HiveRemoveEmptySingleRules extends PruneEmptyRules { + + public static final RelOptRule PROJECT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyProject") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Project.class, project -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule FILTER_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyFilter") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Filter.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule JOIN_LEFT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs(), + b2 -> b2.operand(RelNode.class).anyInputs())) + .withDescription("HivePruneEmptyJoin(left)") + .as(JoinLeftEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}. + * In case of right outer join if the left branch is empty the join operator can be removed + * and take the right branch only. + * + * select * from (select * from emp where 1=0) right join dept + * to + * select null as emp.col0 ... null as emp.coln, dept.* from dept + */ + public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + HiveValues empty = call.rel(1); + RelNode right = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnLeft()) { + // "select * from emp right join dept" is not necessarily empty if + // emp is empty + List<RexNode> projects = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + // left + addNullLiterals(rexBuilder, empty, projects, columnNames); + // right + copyProjects(rexBuilder, right.getRowType(), + join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames); + + RelNode project = call.builder().push(right).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + private static void addNullLiterals( + RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) { + for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) { + RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i); + RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType()); + projectFields.add(nullLiteral); + newColumnNames.add(empty.getRowType().getFieldList().get(i).getName()); + } + } + + public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType, + RelDataType castRowType, int castRowTypeOffset, + List<RexNode> outProjects, List<String> outProjectNames) { + + for (int i = 0; i < inRowType.getFieldCount(); ++i) { + RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i); + RexInputRef inputRef = rexBuilder.makeInputRef(relDataTypeField.getType(), i); + RexNode cast = rexBuilder.makeCast( + castRowType.getFieldList().get(castRowTypeOffset + i).getType(), inputRef); + outProjects.add(cast); + outProjectNames.add(relDataTypeField.getName()); + } + } + + public static final RelOptRule JOIN_RIGHT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(RelNode.class).anyInputs(), + b2 -> b2.operand(Values.class).predicate(Values::isEmpty) + .noInputs())) + .withDescription("HivePruneEmptyJoin(right)") + .as(JoinRightEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinRightEmptyRuleConfig}. + * In case of left outer join if the right branch is empty the join operator can be removed + * and take the left branch only. + * + * select * from emp right join (select * from dept where 1=0) + * to + * select emp.*, null as dept.col0 ... null as dept.coln from emp + */ + public interface JoinRightEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + RelNode left = call.rel(1); + HiveValues empty = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnRight()) { + // "select * from emp left join dept" is not necessarily empty if + // dept is empty + List<RexNode> projects = new ArrayList<>( + left.getRowType().getFieldCount() + empty.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + left.getRowType().getFieldCount() + empty.getRowType().getFieldCount()); + // left + copyProjects(rexBuilder, left.getRowType(), join.getRowType(), 0, projects, columnNames); + // right + addNullLiterals(rexBuilder, empty, projects, columnNames); + + RelNode project = call.builder().push(left).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + if (join.getJoinType() == JoinRelType.ANTI) { + // In case of anti join: Join(X, Empty, ANTI) becomes X + call.transformTo(join.getLeft()); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + public static final RelOptRule SORT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptySort") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Sort.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule SORT_FETCH_ZERO_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b -> + b.operand(Sort.class).anyInputs()) + .withDescription("HivePruneSortLimit0") + .as(PruneEmptyRules.SortFetchZeroRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule AGGREGATE_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyAggregate") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Aggregate.class, Aggregate::isNotGrandTotal) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule UNION_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Union.class).unorderedInputs(b1 -> + b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs())) + .withDescription("HivePruneEmptyUnionBranch") + .as(HiveUnionEmptyPruneRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Copy of {@link PruneEmptyRules.UnionEmptyPruneRuleConfig} but this version expects {@link Union}. + */ + public interface HiveUnionEmptyPruneRuleConfig extends PruneEmptyRules.PruneEmptyRule.Config { + @Override default PruneEmptyRules.PruneEmptyRule toRule() { + return new PruneEmptyRules.PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + final Union union = call.rel(0); + final List<RelNode> inputs = union.getInputs(); + assert inputs != null; + final RelBuilder builder = call.builder(); + int nonEmptyInputs = 0; + for (RelNode input : inputs) { + if (!isEmpty(input)) { + builder.push(input); + nonEmptyInputs++; + } + } + assert nonEmptyInputs < inputs.size() + : "planner promised us at least one Empty child: " + + RelOptUtil.toString(union); + if (nonEmptyInputs == 0) { + builder.push(union).empty(); + } else { + builder.union(union.all, nonEmptyInputs); + builder.convert(union.getRowType(), true); + } + call.transformTo(builder.build()); + } + }; + } + } + + private static boolean isEmpty(RelNode node) { + if (node instanceof Values) { + return ((Values) node).getTuples().isEmpty(); + } + if (node instanceof HepRelVertex) { + return isEmpty(((HepRelVertex) node).getCurrentRel()); + } + // Note: relation input might be a RelSubset, so we just iterate over the relations + // in order to check if the subset is equivalent to an empty relation. + if (!(node instanceof RelSubset)) { + return false; + } + RelSubset subset = (RelSubset) node; + for (RelNode rel : subset.getRels()) { + if (isEmpty(rel)) { + return true; + } + } + return false; + } Review Comment: This method is copied from Calcite and I don't want to do any change since eventually this will be removed when we upgrade the Calcite version. Btw I prefer the current code style: return from the method as early as possible otherwise large blocks must be indented and it can be hard to read the code if many blocks are nested. Issue Time Tracking ------------------- Worklog Id: (was: 811183) Time Spent: 2h 10m (was: 2h) > Use Calcite to remove sections of a query plan known never produces rows > ------------------------------------------------------------------------ > > Key: HIVE-26524 > URL: https://issues.apache.org/jira/browse/HIVE-26524 > Project: Hive > Issue Type: Improvement > Components: CBO > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 2h 10m > Remaining Estimate: 0h > > Calcite has a set of rules to remove sections of a query plan known never > produces any rows. In some cases the whole plan can be removed. Such plans > are represented with a single {{Values}} operators with no tuples. ex.: > {code:java} > select y + 1 from (select a1 y, b1 z from t1 where b1 > 10) q WHERE 1=0 > {code} > {code:java} > HiveValues(tuples=[[]]) > {code} > Other cases when plan has outer join or set operators some branches can be > replaced with empty values moving forward in some cases the join/set operator > can be removed > {code:java} > select a2, b2 from t2 where 1=0 > union > select a1, b1 from t1 > {code} > {code:java} > HiveAggregate(group=[{0, 1}]) > HiveTableScan(table=[[default, t1]], table:alias=[t1]) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)