[ https://issues.apache.org/jira/browse/HIVE-26524?focusedWorklogId=811180&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-811180 ]
ASF GitHub Bot logged work on HIVE-26524: ----------------------------------------- Author: ASF GitHub Bot Created on: 22/Sep/22 11:50 Start Date: 22/Sep/22 11:50 Worklog Time Spent: 10m Work Description: kasakrisz commented on code in PR #3588: URL: https://github.com/apache/hive/pull/3588#discussion_r977557947 ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java: ########## @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.rules.PruneEmptyRules; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides access to Calcite's {@link PruneEmptyRules}. + * The instances of the rules use {@link org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}. + */ +public class HiveRemoveEmptySingleRules extends PruneEmptyRules { + + public static final RelOptRule PROJECT_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyProject") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Project.class, project -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule FILTER_INSTANCE = + RelRule.Config.EMPTY + .withDescription("HivePruneEmptyFilter") + .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class) + .withOperandFor(Filter.class, singleRel -> true) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + public static final RelOptRule JOIN_LEFT_INSTANCE = + RelRule.Config.EMPTY + .withOperandSupplier(b0 -> + b0.operand(Join.class).inputs( + b1 -> b1.operand(Values.class) + .predicate(Values::isEmpty).noInputs(), + b2 -> b2.operand(RelNode.class).anyInputs())) + .withDescription("HivePruneEmptyJoin(left)") + .as(JoinLeftEmptyRuleConfig.class) + .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER) + .toRule(); + + /** + * Improved version of Calcite's {@link PruneEmptyRules.JoinLeftEmptyRuleConfig}. + * In case of right outer join if the left branch is empty the join operator can be removed + * and take the right branch only. + * + * select * from (select * from emp where 1=0) right join dept + * to + * select null as emp.col0 ... null as emp.coln, dept.* from dept + */ + public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config { + @Override default PruneEmptyRule toRule() { + return new PruneEmptyRule(this) { + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + HiveValues empty = call.rel(1); + RelNode right = call.rel(2); + RexBuilder rexBuilder = call.builder().getRexBuilder(); + if (join.getJoinType().generatesNullsOnLeft()) { + // "select * from emp right join dept" is not necessarily empty if + // emp is empty + List<RexNode> projects = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + List<String> columnNames = new ArrayList<>( + empty.getRowType().getFieldCount() + right.getRowType().getFieldCount()); + // left + addNullLiterals(rexBuilder, empty, projects, columnNames); + // right + copyProjects(rexBuilder, right.getRowType(), + join.getRowType(), empty.getRowType().getFieldCount(), projects, columnNames); + + RelNode project = call.builder().push(right).project(projects, columnNames).build(); + call.transformTo(project); + return; + } + call.transformTo(call.builder().push(join).empty().build()); + } + }; + } + } + + private static void addNullLiterals( + RexBuilder rexBuilder, HiveValues empty, List<RexNode> projectFields, List<String> newColumnNames) { + for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) { + RelDataTypeField relDataTypeField = empty.getRowType().getFieldList().get(i); + RexNode nullLiteral = rexBuilder.makeNullLiteral(relDataTypeField.getType()); + projectFields.add(nullLiteral); + newColumnNames.add(empty.getRowType().getFieldList().get(i).getName()); + } + } + + public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType, Review Comment: Yes it can be and changed it. Issue Time Tracking ------------------- Worklog Id: (was: 811180) Time Spent: 2h (was: 1h 50m) > Use Calcite to remove sections of a query plan known never produces rows > ------------------------------------------------------------------------ > > Key: HIVE-26524 > URL: https://issues.apache.org/jira/browse/HIVE-26524 > Project: Hive > Issue Type: Improvement > Components: CBO > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 2h > Remaining Estimate: 0h > > Calcite has a set of rules to remove sections of a query plan known never > produces any rows. In some cases the whole plan can be removed. Such plans > are represented with a single {{Values}} operators with no tuples. ex.: > {code:java} > select y + 1 from (select a1 y, b1 z from t1 where b1 > 10) q WHERE 1=0 > {code} > {code:java} > HiveValues(tuples=[[]]) > {code} > Other cases when plan has outer join or set operators some branches can be > replaced with empty values moving forward in some cases the join/set operator > can be removed > {code:java} > select a2, b2 from t2 where 1=0 > union > select a1, b1 from t1 > {code} > {code:java} > HiveAggregate(group=[{0, 1}]) > HiveTableScan(table=[[default, t1]], table:alias=[t1]) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)