[
https://issues.apache.org/jira/browse/HIVE-26524?focusedWorklogId=811179&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-811179
]
ASF GitHub Bot logged work on HIVE-26524:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 22/Sep/22 11:50
Start Date: 22/Sep/22 11:50
Worklog Time Spent: 10m
Work Description: kasakrisz commented on code in PR #3588:
URL: https://github.com/apache/hive/pull/3588#discussion_r977557467
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveEmptySingleRules.java:
##########
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rel.core.Union;
+import org.apache.calcite.rel.core.Values;
+import org.apache.calcite.rel.rules.PruneEmptyRules;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class provides access to Calcite's {@link PruneEmptyRules}.
+ * The instances of the rules use {@link
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelBuilder}.
+ */
+public class HiveRemoveEmptySingleRules extends PruneEmptyRules {
+
+ public static final RelOptRule PROJECT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyProject")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Project.class, project -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule FILTER_INSTANCE =
+ RelRule.Config.EMPTY
+ .withDescription("HivePruneEmptyFilter")
+ .as(PruneEmptyRules.RemoveEmptySingleRule.Config.class)
+ .withOperandFor(Filter.class, singleRel -> true)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ public static final RelOptRule JOIN_LEFT_INSTANCE =
+ RelRule.Config.EMPTY
+ .withOperandSupplier(b0 ->
+ b0.operand(Join.class).inputs(
+ b1 -> b1.operand(Values.class)
+
.predicate(Values::isEmpty).noInputs(),
+ b2 -> b2.operand(RelNode.class).anyInputs()))
+ .withDescription("HivePruneEmptyJoin(left)")
+ .as(JoinLeftEmptyRuleConfig.class)
+ .withRelBuilderFactory(HiveRelFactories.HIVE_BUILDER)
+ .toRule();
+
+ /**
+ * Improved version of Calcite's {@link
PruneEmptyRules.JoinLeftEmptyRuleConfig}.
+ * In case of right outer join if the left branch is empty the join operator
can be removed
+ * and take the right branch only.
+ *
+ * select * from (select * from emp where 1=0) right join dept
+ * to
+ * select null as emp.col0 ... null as emp.coln, dept.* from dept
+ */
+ public interface JoinLeftEmptyRuleConfig extends PruneEmptyRule.Config {
+ @Override default PruneEmptyRule toRule() {
+ return new PruneEmptyRule(this) {
+ @Override public void onMatch(RelOptRuleCall call) {
+ Join join = call.rel(0);
+ HiveValues empty = call.rel(1);
+ RelNode right = call.rel(2);
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ if (join.getJoinType().generatesNullsOnLeft()) {
+ // "select * from emp right join dept" is not necessarily empty if
+ // emp is empty
+ List<RexNode> projects = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ List<String> columnNames = new ArrayList<>(
+ empty.getRowType().getFieldCount() +
right.getRowType().getFieldCount());
+ // left
+ addNullLiterals(rexBuilder, empty, projects, columnNames);
+ // right
+ copyProjects(rexBuilder, right.getRowType(),
+ join.getRowType(), empty.getRowType().getFieldCount(),
projects, columnNames);
+
+ RelNode project = call.builder().push(right).project(projects,
columnNames).build();
+ call.transformTo(project);
+ return;
+ }
+ call.transformTo(call.builder().push(join).empty().build());
+ }
+ };
+ }
+ }
+
+ private static void addNullLiterals(
+ RexBuilder rexBuilder, HiveValues empty, List<RexNode>
projectFields, List<String> newColumnNames) {
+ for (int i = 0; i < empty.getRowType().getFieldList().size(); ++i) {
+ RelDataTypeField relDataTypeField =
empty.getRowType().getFieldList().get(i);
+ RexNode nullLiteral =
rexBuilder.makeNullLiteral(relDataTypeField.getType());
+ projectFields.add(nullLiteral);
+ newColumnNames.add(empty.getRowType().getFieldList().get(i).getName());
+ }
+ }
+
+ public static void copyProjects(RexBuilder rexBuilder, RelDataType inRowType,
+ RelDataType castRowType, int
castRowTypeOffset,
+ List<RexNode> outProjects, List<String>
outProjectNames) {
+
+ for (int i = 0; i < inRowType.getFieldCount(); ++i) {
+ RelDataTypeField relDataTypeField = inRowType.getFieldList().get(i);
+ RexInputRef inputRef =
rexBuilder.makeInputRef(relDataTypeField.getType(), i);
+ RexNode cast = rexBuilder.makeCast(
+ castRowType.getFieldList().get(castRowTypeOffset + i).getType(),
inputRef);
Review Comment:
Moved to after the ruleconfigs hadle joins.
Issue Time Tracking
-------------------
Worklog Id: (was: 811179)
Time Spent: 1h 50m (was: 1h 40m)
> Use Calcite to remove sections of a query plan known never produces rows
> ------------------------------------------------------------------------
>
> Key: HIVE-26524
> URL: https://issues.apache.org/jira/browse/HIVE-26524
> Project: Hive
> Issue Type: Improvement
> Components: CBO
> Reporter: Krisztian Kasa
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1h 50m
> Remaining Estimate: 0h
>
> Calcite has a set of rules to remove sections of a query plan known never
> produces any rows. In some cases the whole plan can be removed. Such plans
> are represented with a single {{Values}} operators with no tuples. ex.:
> {code:java}
> select y + 1 from (select a1 y, b1 z from t1 where b1 > 10) q WHERE 1=0
> {code}
> {code:java}
> HiveValues(tuples=[[]])
> {code}
> Other cases when plan has outer join or set operators some branches can be
> replaced with empty values moving forward in some cases the join/set operator
> can be removed
> {code:java}
> select a2, b2 from t2 where 1=0
> union
> select a1, b1 from t1
> {code}
> {code:java}
> HiveAggregate(group=[{0, 1}])
> HiveTableScan(table=[[default, t1]], table:alias=[t1])
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)