[jira] [Work logged] (HIVE-24564) Extend PPD filter transitivity to be able to discover new opportunities

ASF GitHub Bot (Jira) Wed, 23 Dec 2020 18:01:10 -0800


     [ 
https://issues.apache.org/jira/browse/HIVE-24564?focusedWorklogId=527940&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-527940
 ]


ASF GitHub Bot logged work on HIVE-24564:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 24/Dec/20 02:00
            Start Date: 24/Dec/20 02:00
    Worklog Time Spent: 10m 
      Work Description: jcamachor commented on a change in pull request #1811:
URL: https://github.com/apache/hive/pull/1811#discussion_r548344387



##########
File path: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
##########
@@ -728,6 +769,134 @@ private void applyFilterTransitivity(JoinOperator join, 
int targetPos, OpWalkerI
         }
       }
     }
+
+    private void extractColumnExprNodes(ExprNodeDesc exprNodeDesc, 
List<ExprNodeColumnDesc> result) {
+      if (exprNodeDesc instanceof ExprNodeColumnDesc) {
+        result.add((ExprNodeColumnDesc) exprNodeDesc);
+        return;
+      }
+      if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
+        for (ExprNodeDesc child : exprNodeDesc.getChildren()) {
+          extractColumnExprNodes(child, result);
+        }
+      }
+    }
+
+    private ExprNodeDesc replaceColumnExprNodes(ExprNodeDesc exprNodeDesc, 
Map<ExprNodeDesc, ExprNodeDesc> replaceMap) {
+      if (exprNodeDesc instanceof ExprNodeColumnDesc) {
+        return replaceMap.getOrDefault(exprNodeDesc, exprNodeDesc);
+      }
+      if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
+        ExprNodeGenericFuncDesc exprNodeGenericFuncDesc = 
(ExprNodeGenericFuncDesc) exprNodeDesc.clone();
+        List<ExprNodeDesc> replacedChildren = new 
ArrayList<>(exprNodeDesc.getChildren().size());
+        for (ExprNodeDesc child : exprNodeDesc.getChildren()) {
+          replacedChildren.add(replaceColumnExprNodes(child, replaceMap));
+        }
+        exprNodeGenericFuncDesc.setChildren(replacedChildren);
+        return exprNodeGenericFuncDesc;
+      }
+
+      return exprNodeDesc;
+    }
+
+    private Map<ExprNodeDesc, String> walk(Operator<?> operator, 
List<ExprNodeColumnDesc> exprNodeDescList) {
+      Map<ExprNodeDesc, String> equalities;
+      if (operator instanceof CommonJoinOperator) {
+        equalities = processJoinEq((CommonJoinOperator<?>)operator, 
exprNodeDescList);
+      } else {
+        equalities = processDefaultEq(operator, exprNodeDescList);
+      }
+      return equalities;
+    }
+
+    private Map<ExprNodeDesc, String> processJoinEq(
+            CommonJoinOperator<?> join, List<ExprNodeColumnDesc> 
exprNodeDescList) {
+      if (exprNodeDescList.isEmpty()) {
+        return Collections.emptyMap();
+      }
+      Map<ExprNodeDesc, String> equalities = new HashMap<>();
+      for (ExprNodeColumnDesc exprNodeDesc : exprNodeDescList) {
+        ExprNodeDesc mappedColExpr = 
join.getColumnExprMap().get(exprNodeDesc.getColumn());
+        if (!(mappedColExpr instanceof ExprNodeColumnDesc)) {
+          continue;
+        }
+        String mappedColName = ((ExprNodeColumnDesc)mappedColExpr).getColumn();
+        int sideIndex = 
join.getConf().getReversedExprs().get(exprNodeDesc.getColumn());
+        Operator<?> parentRSOperator = 
join.getParentOperators().get(sideIndex);
+        for (int i = 0; i < join.getConf().getJoinKeys()[sideIndex].length; 
++i) {
+          ExprNodeDesc keyExpr = join.getConf().getJoinKeys()[sideIndex][i];
+          if 
(!keyExpr.isSame(parentRSOperator.getColumnExprMap().get(mappedColName))) {
+            continue;
+          }
+
+          // exprNodeDesc is join key
+          // find the other key in the join expression
+          Operator<?> otherParentRSOperator = join.getParentOperators().get(1 
- sideIndex);
+          for (Entry<String, ExprNodeDesc> joinMapEntry : 
join.getColumnExprMap().entrySet()) {
+            if (join.getConf().getReversedExprs().get(joinMapEntry.getKey()) 
!= 1 - sideIndex) {
+              continue;
+            }
+
+            String otherColumnName = ((ExprNodeColumnDesc) 
joinMapEntry.getValue()).getColumn();
+            ExprNodeDesc mappedOtherKeyExpr = 
otherParentRSOperator.getColumnExprMap().get(otherColumnName);
+            ExprNodeDesc otherKeyExpr = join.getConf().getJoinKeys()[1 - 
sideIndex][i];
+            if (mappedOtherKeyExpr != null && 
otherKeyExpr.isSame(mappedOtherKeyExpr)) {
+              equalities.put(exprNodeDesc, joinMapEntry.getKey());
+            }
+          }
+        }
+      }
+
+      for (Operator<?> parent : join.getParentOperators()) {
+        equalities.putAll(walk(parent, exprNodeDescList));
+      }
+
+      return equalities;
+    }
+
+    private Map<ExprNodeDesc, String> processDefaultEq(
+            Operator<?> operator, List<ExprNodeColumnDesc> exprNodeDescList) {
+      if (exprNodeDescList.isEmpty()) {
+        return Collections.emptyMap();
+      }
+
+      Map<String, ExprNodeDesc> columnExprMap = operator.getColumnExprMap();
+      if (columnExprMap == null) {

Review comment:
       When would this be null? Can we add a comment?

##########
File path: ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
##########
@@ -728,6 +769,134 @@ private void applyFilterTransitivity(JoinOperator join, 
int targetPos, OpWalkerI
         }
       }
     }
+
+    private void extractColumnExprNodes(ExprNodeDesc exprNodeDesc, 
List<ExprNodeColumnDesc> result) {
+      if (exprNodeDesc instanceof ExprNodeColumnDesc) {
+        result.add((ExprNodeColumnDesc) exprNodeDesc);
+        return;
+      }
+      if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
+        for (ExprNodeDesc child : exprNodeDesc.getChildren()) {
+          extractColumnExprNodes(child, result);
+        }
+      }
+    }
+
+    private ExprNodeDesc replaceColumnExprNodes(ExprNodeDesc exprNodeDesc, 
Map<ExprNodeDesc, ExprNodeDesc> replaceMap) {
+      if (exprNodeDesc instanceof ExprNodeColumnDesc) {
+        return replaceMap.getOrDefault(exprNodeDesc, exprNodeDesc);
+      }
+      if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
+        ExprNodeGenericFuncDesc exprNodeGenericFuncDesc = 
(ExprNodeGenericFuncDesc) exprNodeDesc.clone();
+        List<ExprNodeDesc> replacedChildren = new 
ArrayList<>(exprNodeDesc.getChildren().size());
+        for (ExprNodeDesc child : exprNodeDesc.getChildren()) {
+          replacedChildren.add(replaceColumnExprNodes(child, replaceMap));
+        }
+        exprNodeGenericFuncDesc.setChildren(replacedChildren);
+        return exprNodeGenericFuncDesc;
+      }
+
+      return exprNodeDesc;
+    }
+
+    private Map<ExprNodeDesc, String> walk(Operator<?> operator, 
List<ExprNodeColumnDesc> exprNodeDescList) {

Review comment:
       Consider changing method name.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 527940)
    Time Spent: 20m  (was: 10m)

> Extend PPD filter transitivity to be able to discover new opportunities
> -----------------------------------------------------------------------
>
>                 Key: HIVE-24564
>                 URL: https://issues.apache.org/jira/browse/HIVE-24564
>             Project: Hive
>          Issue Type: Improvement
>          Components: Logical Optimizer
>            Reporter: Krisztian Kasa
>            Assignee: Krisztian Kasa
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 20m
>  Remaining Estimate: 0h
>
> If a predicate references a value column of one of the parent ReduceSink 
> operators of a Join the predicate can not be copied and pushed down to the 
> other side of the join. However if we a parent equijoin exists in the branch 
> of the RS where 
>  1. the referenced value column is a key column of that join
>  2. and the other side of that join expression is the key column of the RS
>  the column in the predicate can be replaced and the new predicate can be 
> pushed down.
> {code:java}
>                                    Join(... = wr_on)
>                                   /                 \
>                                 ...                  RS(key: wr_on)
>                                                           |
>                                               Join(ws1.ws_on = ws2.ws_on)
>                                               (ws1.ws_on, ws2.ws_on, wr_on)
>                                               /                         \
>                                   RS(key:ws_on)                          
> RS(key:ws_on)
>                                     (value: wr_on)
>                                        |                                      
>  |
>                            Join(ws1.ws_on = wr.wr_on)                       
> TS(ws2)
>                            /                        \
>                      RS(key:ws_on)              RS(key:wr_on)
>                            |                        |
>                         TS(ws1)                   TS(wr)
> {code}
> A predicate like
> {code}
> (wr_on in (...))
> {code}
> can not be pushed to TS(ws2) because wr_on is not a key column in 
> Join(ws1.ws_on = ws2.ws_on). But we know that wr_on is equals to ws_on 
> because the join from the left branch. 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Work logged] (HIVE-24564) Extend PPD filter transitivity to be able to discover new opportunities

Reply via email to