[ https://issues.apache.org/jira/browse/HIVE-24231?focusedWorklogId=499957&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-499957 ]
ASF GitHub Bot logged work on HIVE-24231: ----------------------------------------- Author: ASF GitHub Bot Created on: 13/Oct/20 09:54 Start Date: 13/Oct/20 09:54 Worklog Time Spent: 10m Work Description: kgyrtkirk commented on a change in pull request #1553: URL: https://github.com/apache/hive/pull/1553#discussion_r503821169 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java ########## @@ -386,125 +456,81 @@ public boolean sharedWorkOptimization(ParseContext pctx, SharedWorkOptimizerCach LOG.debug("Merging subtree starting at {} into subtree starting at {}", discardableTsOp, retainableTsOp); } else { - ExprNodeDesc newRetainableTsFilterExpr = null; - List<ExprNodeDesc> semijoinExprNodes = new ArrayList<>(); - if (retainableTsOp.getConf().getFilterExpr() != null) { - // Gather SJ expressions and normal expressions - List<ExprNodeDesc> allExprNodesExceptSemijoin = new ArrayList<>(); - splitExpressions(retainableTsOp.getConf().getFilterExpr(), - allExprNodesExceptSemijoin, semijoinExprNodes); - // Create new expressions - if (allExprNodesExceptSemijoin.size() > 1) { - newRetainableTsFilterExpr = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPAnd(), allExprNodesExceptSemijoin); - } else if (allExprNodesExceptSemijoin.size() > 0 && - allExprNodesExceptSemijoin.get(0) instanceof ExprNodeGenericFuncDesc) { - newRetainableTsFilterExpr = allExprNodesExceptSemijoin.get(0); - } - // Push filter on top of children for retainable - pushFilterToTopOfTableScan(optimizerCache, retainableTsOp); + + if (sr.discardableOps.size() > 1) { + throw new RuntimeException("we can't discard more in this path"); } - ExprNodeDesc newDiscardableTsFilterExpr = null; - if (discardableTsOp.getConf().getFilterExpr() != null) { - // If there is a single discardable operator, it is a TableScanOperator - // and it means that we will merge filter expressions for it. Thus, we - // might need to remove DPP predicates before doing that - List<ExprNodeDesc> allExprNodesExceptSemijoin = new ArrayList<>(); - splitExpressions(discardableTsOp.getConf().getFilterExpr(), - allExprNodesExceptSemijoin, new ArrayList<>()); - // Create new expressions - if (allExprNodesExceptSemijoin.size() > 1) { - newDiscardableTsFilterExpr = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPAnd(), allExprNodesExceptSemijoin); - } else if (allExprNodesExceptSemijoin.size() > 0 && - allExprNodesExceptSemijoin.get(0) instanceof ExprNodeGenericFuncDesc) { - newDiscardableTsFilterExpr = allExprNodesExceptSemijoin.get(0); - } - // Remove and add semijoin filter from expressions - replaceSemijoinExpressions(discardableTsOp, semijoinExprNodes); - // Push filter on top of children for discardable - pushFilterToTopOfTableScan(optimizerCache, discardableTsOp); + + SharedWorkModel modelR = new SharedWorkModel(retainableTsOp); + SharedWorkModel modelD = new SharedWorkModel(discardableTsOp); + + // Push filter on top of children for retainable + pushFilterToTopOfTableScan(optimizerCache, retainableTsOp); + + if (mode == Mode.RemoveSemijoin || mode == Mode.SubtreeMerge) { + // FIXME: I think idea here is to clear the discardable's semijoin filter + // - by using the retainable's (which should be empty in case of this mode) + replaceSemijoinExpressions(discardableTsOp, modelR.getSemiJoinFilter()); } + // Push filter on top of children for discardable + pushFilterToTopOfTableScan(optimizerCache, discardableTsOp); + // Obtain filter for shared TS operator - ExprNodeGenericFuncDesc exprNode = null; - if (newRetainableTsFilterExpr != null && newDiscardableTsFilterExpr != null) { - // Combine - exprNode = (ExprNodeGenericFuncDesc) newRetainableTsFilterExpr; - if (!exprNode.isSame(newDiscardableTsFilterExpr)) { - // We merge filters from previous scan by ORing with filters from current scan - if (exprNode.getGenericUDF() instanceof GenericUDFOPOr) { - List<ExprNodeDesc> newChildren = new ArrayList<>(exprNode.getChildren().size() + 1); - for (ExprNodeDesc childExprNode : exprNode.getChildren()) { - if (childExprNode.isSame(newDiscardableTsFilterExpr)) { - // We do not need to do anything, it is in the OR expression - break; - } - newChildren.add(childExprNode); - } - if (exprNode.getChildren().size() == newChildren.size()) { - newChildren.add(newDiscardableTsFilterExpr); - exprNode = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPOr(), - newChildren); - } - } else { - exprNode = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPOr(), - Arrays.asList(exprNode, newDiscardableTsFilterExpr)); - } - } + ExprNodeDesc exprNode = null; + if (modelR.normalFilterExpr != null && modelD.normalFilterExpr != null) { + exprNode = disjunction(modelR.normalFilterExpr, modelD.normalFilterExpr); } - // Create expression node that will be used for the retainable table scan - if (!semijoinExprNodes.isEmpty()) { - if (exprNode != null) { - semijoinExprNodes.add(0, exprNode); - } - if (semijoinExprNodes.size() > 1) { - exprNode = ExprNodeGenericFuncDesc.newInstance( - new GenericUDFOPAnd(), semijoinExprNodes); - } else { - exprNode = (ExprNodeGenericFuncDesc) semijoinExprNodes.get(0); - } + List<ExprNodeDesc> semiJoinExpr = null; + if (mode == Mode.DPPUnion) { + assert modelR.semijoinExprNodes != null; + assert modelD.semijoinExprNodes != null; + ExprNodeDesc disjunction = disjunction(conjunction(modelR.semijoinExprNodes), conjunction(modelD.semijoinExprNodes)); + semiJoinExpr = disjunction == null ? null : Lists.newArrayList(disjunction); + } else { + semiJoinExpr = modelR.semijoinExprNodes; } + + // Create expression node that will be used for the retainable table scan + exprNode = conjunction(semiJoinExpr, exprNode); // Replace filter - retainableTsOp.getConf().setFilterExpr(exprNode); + retainableTsOp.getConf().setFilterExpr((ExprNodeGenericFuncDesc) exprNode); // Replace table scan operator List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(discardableTsOp.getChildOperators()); for (Operator<? extends OperatorDesc> op : allChildren) { - discardableTsOp.getChildOperators().remove(op); op.replaceParent(discardableTsOp, retainableTsOp); retainableTsOp.getChildOperators().add(op); } + discardableTsOp.getChildOperators().clear(); LOG.debug("Merging {} into {}", discardableTsOp, retainableTsOp); } // First we remove the input operators of the expression that Review comment: this small block can be done with a function(`adoptChildren`) from the next patch; I've not yet noticed that I can use it here as well - I've added the method and called it ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 499957) Time Spent: 1h 10m (was: 1h) > Enhance shared work optimizer to merge scans with filters on both sides > ----------------------------------------------------------------------- > > Key: HIVE-24231 > URL: https://issues.apache.org/jira/browse/HIVE-24231 > Project: Hive > Issue Type: Improvement > Reporter: Zoltan Haindrich > Assignee: Zoltan Haindrich > Priority: Major > Labels: pull-request-available > Time Spent: 1h 10m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)