asolimando commented on code in PR #5249: URL: https://github.com/apache/hive/pull/5249#discussion_r1600252878
########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RemoveUnusedCteRule.java: ########## @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.TableScan; + +import java.util.List; +import java.util.Map; + +/** + * Rule that removes unused CTEs from the plan by expanding the respective TableScans. + * <p>The rule assumes that all materializations registered in the planner refer to CTEs.</p> + */ +public class RemoveUnusedCteRule extends RelOptRule { Review Comment: Nit: "Unused" it's a bit misleading as it's usually means used 0 times, I don't have a better name right away but it might be worth thinking about it ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePartitionPruneRule.java: ########## @@ -50,6 +50,9 @@ protected void perform(RelOptRuleCall call, Filter filter, HiveTableScan tScan) { // Original table RelOptHiveTable hiveTable = (RelOptHiveTable) tScan.getTable(); + if (hiveTable.getHiveTableMD().isTemporary()) { Review Comment: Maybe a comment could help people down the line, it's clear in the PR that we are introducing temporary tables for CTEs but it might be confusing by looking at the rule in isolation ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RemoveUnusedCteRule.java: ########## @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.TableScan; + +import java.util.List; +import java.util.Map; + +/** + * Rule that removes unused CTEs from the plan by expanding the respective TableScans. + * <p>The rule assumes that all materializations registered in the planner refer to CTEs.</p> + */ +public class RemoveUnusedCteRule extends RelOptRule { + + private final int referenceThreshold; + private final Map<List<String>, Long> tableOccurrences; + + public RemoveUnusedCteRule(Map<List<String>, Long> tableOccurrences, int referenceThreshold) { Review Comment: Similarly, if the occurrences need a `Long`, `referenceThreshold` needs `long` type too. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java: ########## @@ -114,25 +115,31 @@ public class ASTConverter { private Filter having; private RelNode select; private RelNode orderLimit; + private List<ASTNode> ctes; private Schema schema; private long derivedTableCount; private PlanMapper planMapper; - ASTConverter(RelNode root, long dtCounterInitVal, PlanMapper planMapper) { + ASTConverter(RelNode root, long dtCounterInitVal, PlanMapper planMapper, List<ASTNode> ctes) { this.root = root; hiveAST = new HiveAST(); this.derivedTableCount = dtCounterInitVal; this.planMapper = planMapper; + this.ctes = ctes; } public static ASTNode convert(final RelNode relNode, List<FieldSchema> resultSchema, boolean alignColumns, PlanMapper planMapper) throws CalciteSemanticException { RelNode root = PlanModifierForASTConv.convertOpTree(relNode, resultSchema, alignColumns); - ASTConverter c = new ASTConverter(root, 0, planMapper); - return c.convert(); + ASTConverter c = new ASTConverter(root, 0, planMapper, new ArrayList<>()); + ASTNode r = c.convert(); + for (ASTNode cte : c.ctes) { Review Comment: Not super familiar with this part of the codebase, but why do we need to manually add the ctes in this case? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RemoveUnusedCteRule.java: ########## @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptMaterialization; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.TableScan; + +import java.util.List; +import java.util.Map; + +/** + * Rule that removes unused CTEs from the plan by expanding the respective TableScans. + * <p>The rule assumes that all materializations registered in the planner refer to CTEs.</p> + */ +public class RemoveUnusedCteRule extends RelOptRule { + + private final int referenceThreshold; + private final Map<List<String>, Long> tableOccurrences; Review Comment: Nit: I think `Integer` is big enough to reference CTEs within a single query plan. ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java: ########## @@ -77,9 +77,11 @@ public static ASTNode table(final RelNode scan) { assert hts != null; RelOptHiveTable hTbl = (RelOptHiveTable) hts.getTable(); - ASTBuilder tableNameBuilder = ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME") - .add(HiveParser.Identifier, hTbl.getHiveTableMD().getDbName()) - .add(HiveParser.Identifier, hTbl.getHiveTableMD().getTableName()); + ASTBuilder tableNameBuilder = ASTBuilder.construct(HiveParser.TOK_TABNAME, "TOK_TABNAME"); + if (!hTbl.getHiveTableMD().isTemporary()) { Review Comment: Nit: as discussed elsewhere, maybe a comment could be useful ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/CommonRelSubExprRegisterRule.java: ########## @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.CommonRelSubExprRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionRegistry; + +import com.google.common.collect.Multimap; + +import java.util.function.Predicate; + +/** + * Rule for saving relational expressions that appear more than once in a query tree to the planner context. + */ +public final class CommonRelSubExprRegisterRule extends CommonRelSubExprRule { + public static final CommonRelSubExprRegisterRule JOIN = new CommonRelSubExprRegisterRule(operand(Join.class, any())); + public static final CommonRelSubExprRegisterRule AGGREGATE = + new CommonRelSubExprRegisterRule(operand(Aggregate.class, any())); + public static final CommonRelSubExprRegisterRule FILTER = + new CommonRelSubExprRegisterRule(operand(Filter.class, any())); + public static final CommonRelSubExprRegisterRule PROJECT = + new CommonRelSubExprRegisterRule(operandJ(Project.class, null, new InterestingRelNodePredicate(), any())); + + private CommonRelSubExprRegisterRule(RelOptRuleOperand operand) { + super(operand); + } + + @Override + public void onMatch(final RelOptRuleCall call) { + CommonTableExpressionRegistry r = call.getPlanner().getContext().unwrap(CommonTableExpressionRegistry.class); + if (r != null) { Review Comment: In what cases it's expected to be `null`? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/AggregatedColumns.java: ########## @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.Metadata; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.util.ImmutableBitSet; + +/** + * Metadata about whether a set of columns originates from aggregation functions. + */ +public interface AggregatedColumns extends Metadata { Review Comment: Question: don't we have anything similar in Calcite already? If not, it's too Hive specific or we could file a Jira to add it? ########## ql/src/test/queries/clientpositive/cte_cbo_plan_json.q: ########## @@ -0,0 +1,16 @@ +CREATE TABLE emps +( + empid INTEGER, + deptno INTEGER, + name VARCHAR(10), + salary DECIMAL(8, 2) +); + +set hive.optimize.cte.materialize.threshold=1; +set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester; +set hive.optimize.cte.materialize.full.aggregate.only=false; + +EXPLAIN FORMATTED CBO +SELECT name FROM emps e WHERE salary > 50000 +UNION +SELECT name FROM emps e WHERE salary > 50000; Review Comment: Nit: missing newline at the end of the file -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
