[ https://issues.apache.org/jira/browse/HIVE-26006?focusedWorklogId=764034&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-764034 ]
ASF GitHub Bot logged work on HIVE-26006: ----------------------------------------- Author: ASF GitHub Bot Created on: 29/Apr/22 07:17 Start Date: 29/Apr/22 07:17 Worklog Time Spent: 10m Work Description: kasakrisz commented on code in PR #3082: URL: https://github.com/apache/hive/pull/3082#discussion_r861515205 ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java: ########## @@ -244,13 +223,35 @@ private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws Semant reduceSinkDesc.getColumnExprMap(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder()); + + pushDownThrough(commonKeyPrefix, topNKey, join, reduceSinkOperator); + } + + private <T extends AbstractOperatorDesc> void pushDownThrough( + CommonKeyPrefix commonKeyPrefix, TopNKeyOperator topNKey, Operator<T> operator) + throws SemanticException { + + pushDownThrough(commonKeyPrefix, topNKey, operator, operator); + } + + private <TDesc extends AbstractOperatorDesc, TParentDesc extends AbstractOperatorDesc> void pushDownThrough( + CommonKeyPrefix commonKeyPrefix, TopNKeyOperator topNKey, + Operator<TDesc> join, Operator<TParentDesc> reduceSinkOperator) + throws SemanticException { + + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) { return; } + final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix); + if (newTopNKeyDesc.getKeyColumns().size() > 0 && + newTopNKeyDesc.getKeyColumns().size() <= newTopNKeyDesc.getPartitionKeyColumns().size()) { Review Comment: The new `TopNKeyDesc` is needed for the new TNK operator which is going to be the pushed through version of the original TNK. The new and the original versions may have different keys that is why the original can not be moved. Added more comment. Yes, the test case I just added with this patch: `ptf_tnk.q` Issue Time Tracking ------------------- Worklog Id: (was: 764034) Time Spent: 40m (was: 0.5h) > TopNKey and PTF with more than one column is failing with IOBE > -------------------------------------------------------------- > > Key: HIVE-26006 > URL: https://issues.apache.org/jira/browse/HIVE-26006 > Project: Hive > Issue Type: Bug > Reporter: Naresh P R > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 40m > Remaining Estimate: 0h > > {code:java} > java.lang.IndexOutOfBoundsException: toIndex = 2 > at java.util.ArrayList.subListRangeCheck(ArrayList.java:1014) > at java.util.ArrayList.subList(ArrayList.java:1006) > at org.apache.hadoop.hive.ql.plan.TopNKeyDesc.combine(TopNKeyDesc.java:201) > at > org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdownThroughGroupBy(TopNKeyPushdownProcessor.java:162) > at > org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdown(TopNKeyPushdownProcessor.java:76) > at > org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.process(TopNKeyPushdownProcessor.java:57) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.runTopNKeyOptimization(TezCompiler.java:1305) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:173) > at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:159) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12646) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:283) > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:219) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:103) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:215){code} -- This message was sent by Atlassian Jira (v8.20.7#820007)