[ 
https://issues.apache.org/jira/browse/HIVE-24231?focusedWorklogId=499957&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-499957
 ]

ASF GitHub Bot logged work on HIVE-24231:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 13/Oct/20 09:54
            Start Date: 13/Oct/20 09:54
    Worklog Time Spent: 10m 
      Work Description: kgyrtkirk commented on a change in pull request #1553:
URL: https://github.com/apache/hive/pull/1553#discussion_r503821169



##########
File path: 
ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
##########
@@ -386,125 +456,81 @@ public boolean sharedWorkOptimization(ParseContext pctx, 
SharedWorkOptimizerCach
               LOG.debug("Merging subtree starting at {} into subtree starting 
at {}",
                   discardableTsOp, retainableTsOp);
             } else {
-              ExprNodeDesc newRetainableTsFilterExpr = null;
-              List<ExprNodeDesc> semijoinExprNodes = new ArrayList<>();
-              if (retainableTsOp.getConf().getFilterExpr() != null) {
-                // Gather SJ expressions and normal expressions
-                List<ExprNodeDesc> allExprNodesExceptSemijoin = new 
ArrayList<>();
-                splitExpressions(retainableTsOp.getConf().getFilterExpr(),
-                    allExprNodesExceptSemijoin, semijoinExprNodes);
-                // Create new expressions
-                if (allExprNodesExceptSemijoin.size() > 1) {
-                  newRetainableTsFilterExpr = 
ExprNodeGenericFuncDesc.newInstance(
-                      new GenericUDFOPAnd(), allExprNodesExceptSemijoin);
-                } else if (allExprNodesExceptSemijoin.size() > 0 &&
-                    allExprNodesExceptSemijoin.get(0) instanceof 
ExprNodeGenericFuncDesc) {
-                  newRetainableTsFilterExpr = 
allExprNodesExceptSemijoin.get(0);
-                }
-                // Push filter on top of children for retainable
-                pushFilterToTopOfTableScan(optimizerCache, retainableTsOp);
+
+              if (sr.discardableOps.size() > 1) {
+                throw new RuntimeException("we can't discard more in this 
path");
               }
-              ExprNodeDesc newDiscardableTsFilterExpr = null;
-              if (discardableTsOp.getConf().getFilterExpr() != null) {
-                // If there is a single discardable operator, it is a 
TableScanOperator
-                // and it means that we will merge filter expressions for it. 
Thus, we
-                // might need to remove DPP predicates before doing that
-                List<ExprNodeDesc> allExprNodesExceptSemijoin = new 
ArrayList<>();
-                splitExpressions(discardableTsOp.getConf().getFilterExpr(),
-                    allExprNodesExceptSemijoin, new ArrayList<>());
-                // Create new expressions
-                if (allExprNodesExceptSemijoin.size() > 1) {
-                  newDiscardableTsFilterExpr = 
ExprNodeGenericFuncDesc.newInstance(
-                      new GenericUDFOPAnd(), allExprNodesExceptSemijoin);
-                } else if (allExprNodesExceptSemijoin.size() > 0 &&
-                    allExprNodesExceptSemijoin.get(0) instanceof 
ExprNodeGenericFuncDesc) {
-                  newDiscardableTsFilterExpr = 
allExprNodesExceptSemijoin.get(0);
-                }
-                // Remove and add semijoin filter from expressions
-                replaceSemijoinExpressions(discardableTsOp, semijoinExprNodes);
-                // Push filter on top of children for discardable
-                pushFilterToTopOfTableScan(optimizerCache, discardableTsOp);
+
+              SharedWorkModel modelR = new SharedWorkModel(retainableTsOp);
+              SharedWorkModel modelD = new SharedWorkModel(discardableTsOp);
+
+              // Push filter on top of children for retainable
+              pushFilterToTopOfTableScan(optimizerCache, retainableTsOp);
+
+              if (mode == Mode.RemoveSemijoin || mode == Mode.SubtreeMerge) {
+                // FIXME: I think idea here is to clear the discardable's 
semijoin filter
+                // - by using the retainable's (which should be empty in case 
of this mode)
+                replaceSemijoinExpressions(discardableTsOp, 
modelR.getSemiJoinFilter());
               }
+              // Push filter on top of children for discardable
+              pushFilterToTopOfTableScan(optimizerCache, discardableTsOp);
+
               // Obtain filter for shared TS operator
-              ExprNodeGenericFuncDesc exprNode = null;
-              if (newRetainableTsFilterExpr != null && 
newDiscardableTsFilterExpr != null) {
-                // Combine
-                exprNode = (ExprNodeGenericFuncDesc) newRetainableTsFilterExpr;
-                if (!exprNode.isSame(newDiscardableTsFilterExpr)) {
-                  // We merge filters from previous scan by ORing with filters 
from current scan
-                  if (exprNode.getGenericUDF() instanceof GenericUDFOPOr) {
-                    List<ExprNodeDesc> newChildren = new 
ArrayList<>(exprNode.getChildren().size() + 1);
-                    for (ExprNodeDesc childExprNode : exprNode.getChildren()) {
-                      if (childExprNode.isSame(newDiscardableTsFilterExpr)) {
-                        // We do not need to do anything, it is in the OR 
expression
-                        break;
-                      }
-                      newChildren.add(childExprNode);
-                    }
-                    if (exprNode.getChildren().size() == newChildren.size()) {
-                      newChildren.add(newDiscardableTsFilterExpr);
-                      exprNode = ExprNodeGenericFuncDesc.newInstance(
-                          new GenericUDFOPOr(),
-                          newChildren);
-                    }
-                  } else {
-                    exprNode = ExprNodeGenericFuncDesc.newInstance(
-                        new GenericUDFOPOr(),
-                        Arrays.asList(exprNode, newDiscardableTsFilterExpr));
-                  }
-                }
+              ExprNodeDesc exprNode = null;
+              if (modelR.normalFilterExpr != null && modelD.normalFilterExpr 
!= null) {
+                exprNode = disjunction(modelR.normalFilterExpr, 
modelD.normalFilterExpr);
               }
-              // Create expression node that will be used for the retainable 
table scan
-              if (!semijoinExprNodes.isEmpty()) {
-                if (exprNode != null) {
-                  semijoinExprNodes.add(0, exprNode);
-                }
-                if (semijoinExprNodes.size() > 1) {
-                  exprNode = ExprNodeGenericFuncDesc.newInstance(
-                      new GenericUDFOPAnd(), semijoinExprNodes);
-                } else {
-                  exprNode = (ExprNodeGenericFuncDesc) 
semijoinExprNodes.get(0);
-                }
+              List<ExprNodeDesc> semiJoinExpr = null;
+              if (mode == Mode.DPPUnion) {
+                assert modelR.semijoinExprNodes != null;
+                assert modelD.semijoinExprNodes != null;
+                ExprNodeDesc disjunction = 
disjunction(conjunction(modelR.semijoinExprNodes), 
conjunction(modelD.semijoinExprNodes));
+                semiJoinExpr = disjunction == null ? null : 
Lists.newArrayList(disjunction);
+              } else {
+                semiJoinExpr = modelR.semijoinExprNodes;
               }
+
+              // Create expression node that will be used for the retainable 
table scan
+              exprNode = conjunction(semiJoinExpr, exprNode);
               // Replace filter
-              retainableTsOp.getConf().setFilterExpr(exprNode);
+              retainableTsOp.getConf().setFilterExpr((ExprNodeGenericFuncDesc) 
exprNode);
               // Replace table scan operator
               List<Operator<? extends OperatorDesc>> allChildren =
                   Lists.newArrayList(discardableTsOp.getChildOperators());
               for (Operator<? extends OperatorDesc> op : allChildren) {
-                discardableTsOp.getChildOperators().remove(op);
                 op.replaceParent(discardableTsOp, retainableTsOp);
                 retainableTsOp.getChildOperators().add(op);
               }
+              discardableTsOp.getChildOperators().clear();
 
               LOG.debug("Merging {} into {}", discardableTsOp, retainableTsOp);
             }
 
             // First we remove the input operators of the expression that

Review comment:
       this small block can be done with a function(`adoptChildren`) from the 
next patch; I've not yet noticed that I can use it here as well - I've added 
the method and called it




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 499957)
    Time Spent: 1h 10m  (was: 1h)

> Enhance shared work optimizer to merge scans with filters on both sides
> -----------------------------------------------------------------------
>
>                 Key: HIVE-24231
>                 URL: https://issues.apache.org/jira/browse/HIVE-24231
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Zoltan Haindrich
>            Assignee: Zoltan Haindrich
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1h 10m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to