[ https://issues.apache.org/jira/browse/HIVE-24231?focusedWorklogId=499942&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-499942 ]
ASF GitHub Bot logged work on HIVE-24231: ----------------------------------------- Author: ASF GitHub Bot Created on: 13/Oct/20 09:32 Start Date: 13/Oct/20 09:32 Worklog Time Spent: 10m Work Description: kgyrtkirk commented on a change in pull request #1553: URL: https://github.com/apache/hive/pull/1553#discussion_r503806798 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java ########## @@ -386,125 +456,81 @@ public boolean sharedWorkOptimization(ParseContext pctx, SharedWorkOptimizerCach LOG.debug("Merging subtree starting at {} into subtree starting at {}", discardableTsOp, retainableTsOp); } else { - ExprNodeDesc newRetainableTsFilterExpr = null; - List<ExprNodeDesc> semijoinExprNodes = new ArrayList<>(); - if (retainableTsOp.getConf().getFilterExpr() != null) { - // Gather SJ expressions and normal expressions - List<ExprNodeDesc> allExprNodesExceptSemijoin = new ArrayList<>(); - splitExpressions(retainableTsOp.getConf().getFilterExpr(), - allExprNodesExceptSemijoin, semijoinExprNodes); - // Create new expressions - if (allExprNodesExceptSemijoin.size() > 1) { - newRetainableTsFilterExpr = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPAnd(), allExprNodesExceptSemijoin); - } else if (allExprNodesExceptSemijoin.size() > 0 && - allExprNodesExceptSemijoin.get(0) instanceof ExprNodeGenericFuncDesc) { - newRetainableTsFilterExpr = allExprNodesExceptSemijoin.get(0); - } - // Push filter on top of children for retainable - pushFilterToTopOfTableScan(optimizerCache, retainableTsOp); + + if (sr.discardableOps.size() > 1) { + throw new RuntimeException("we can't discard more in this path"); } - ExprNodeDesc newDiscardableTsFilterExpr = null; - if (discardableTsOp.getConf().getFilterExpr() != null) { - // If there is a single discardable operator, it is a TableScanOperator - // and it means that we will merge filter expressions for it. Thus, we - // might need to remove DPP predicates before doing that - List<ExprNodeDesc> allExprNodesExceptSemijoin = new ArrayList<>(); - splitExpressions(discardableTsOp.getConf().getFilterExpr(), - allExprNodesExceptSemijoin, new ArrayList<>()); - // Create new expressions - if (allExprNodesExceptSemijoin.size() > 1) { - newDiscardableTsFilterExpr = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPAnd(), allExprNodesExceptSemijoin); - } else if (allExprNodesExceptSemijoin.size() > 0 && - allExprNodesExceptSemijoin.get(0) instanceof ExprNodeGenericFuncDesc) { - newDiscardableTsFilterExpr = allExprNodesExceptSemijoin.get(0); - } - // Remove and add semijoin filter from expressions - replaceSemijoinExpressions(discardableTsOp, semijoinExprNodes); - // Push filter on top of children for discardable - pushFilterToTopOfTableScan(optimizerCache, discardableTsOp); + + SharedWorkModel modelR = new SharedWorkModel(retainableTsOp); + SharedWorkModel modelD = new SharedWorkModel(discardableTsOp); + + // Push filter on top of children for retainable + pushFilterToTopOfTableScan(optimizerCache, retainableTsOp); + + if (mode == Mode.RemoveSemijoin || mode == Mode.SubtreeMerge) { + // FIXME: I think idea here is to clear the discardable's semijoin filter Review comment: I made this note - because it was not obvious to me what's happening here..I've rephrased it to be easier to understand ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 499942) Time Spent: 40m (was: 0.5h) > Enhance shared work optimizer to merge scans with filters on both sides > ----------------------------------------------------------------------- > > Key: HIVE-24231 > URL: https://issues.apache.org/jira/browse/HIVE-24231 > Project: Hive > Issue Type: Improvement > Reporter: Zoltan Haindrich > Assignee: Zoltan Haindrich > Priority: Major > Labels: pull-request-available > Time Spent: 40m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)