[ https://issues.apache.org/jira/browse/HIVE-21365?focusedWorklogId=243741&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-243741 ]
ASF GitHub Bot logged work on HIVE-21365: ----------------------------------------- Author: ASF GitHub Bot Created on: 16/May/19 23:50 Start Date: 16/May/19 23:50 Worklog Time Spent: 10m Work Description: vineetgarg02 commented on pull request #630: HIVE-21365 URL: https://github.com/apache/hive/pull/630#discussion_r284928489 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ########## @@ -2150,75 +1966,69 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); rules.add(HiveUnionPullUpConstantsRule.INSTANCE); rules.add(HiveAggregatePullUpConstantsRule.INSTANCE); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, - rules.toArray(new RelOptRule[rules.size()])); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); + generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP, + rules.toArray(new RelOptRule[rules.size()])); // 4. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // This should be a cost based decision, but till we enable the extended cost // model, we will use the given value for the variable final float reductionProportion = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE); final long reductionTuples = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE, - HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE, - HiveSortUnionReduceRule.INSTANCE); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, + generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, + HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE, + HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE); + generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push down limit through outer join"); } - // 5. Push Down Semi Joins + // Push Down Semi Joins //TODO: Enable this later /*perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); */ - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, - HiveSortLimitRemoveRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Trying to remove Limit and Order by"); + // 5. Try to remove limit and order by + generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, + HiveSortLimitRemoveRule.INSTANCE); // 6. Apply Partition Pruning - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Partition Pruning"); + generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, + new HivePartitionPruneRule(conf)); // 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - HiveRelFactories.HIVE_BUILDER.create(cluster, null), - profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)); - basePlan = fieldTrimmer.trim(basePlan); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Projection Pruning"); + generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN, + new HiveFieldTrimmerRule(profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING))); // 8. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS // Also merge, remove and reduce Project if possible - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, + generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE, HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE); + + // 9. Get rid of sq_count_check if group by key is constant + if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { Review comment: If I remember correct there was a reason this was moved at the beginning. I guess that reason is no longer valid if you don't see any test change. But if you do see a change in plan e.g. of subqueries we will probably need to move it back ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 243741) Time Spent: 0.5h (was: 20m) > Refactor Hep planner steps in CBO > --------------------------------- > > Key: HIVE-21365 > URL: https://issues.apache.org/jira/browse/HIVE-21365 > Project: Hive > Issue Type: Improvement > Components: CBO > Reporter: Jesus Camacho Rodriguez > Assignee: Jesus Camacho Rodriguez > Priority: Major > Labels: pull-request-available > Attachments: HIVE-21365.01.patch, HIVE-21365.01.patch, > HIVE-21365.02.patch, HIVE-21365.03.patch, HIVE-21365.03.patch, > HIVE-21365.04.patch, HIVE-21365.patch > > Time Spent: 0.5h > Remaining Estimate: 0h > > Using subprograms to decrease number of planner instantiations and benefit > fully from metadata providers caching, among other benefits. -- This message was sent by Atlassian JIRA (v7.6.3#76005)