KurtYoung commented on a change in pull request #8499: [FLINK-12575] 
[table-planner-blink] Introduce planner rules to remove redundant shuffle and 
collation
URL: https://github.com/apache/flink/pull/8499#discussion_r287205339
 
 

 ##########
 File path: 
flink-table/flink-table-planner-blink/src/main/scala/org/apache/flink/table/plan/nodes/physical/batch/BatchExecJoinBase.scala
 ##########
 @@ -70,4 +76,150 @@ abstract class BatchExecJoinBase(
       "JoinConditionFunction",
       body)
   }
+
+  /**
+    * Try to satisfy hash distribution on Non-BroadcastJoin (including 
SortMergeJoin and
+    * Non-Broadcast HashJoin).
+    *
+    * @param requiredDistribution distribution requirement
+    * @return a Tuple including 3 element.
+    *         The first element is a flag which indicates whether the 
requirement can be satisfied.
+    *         The second element is the distribution requirement of left child 
if the requirement
+    *         can be push down into join.
+    *         The third element is the distribution requirement of right child 
if the requirement
+    *         can be push down into join.
+    */
+  def satisfyHashDistributionOnNonBroadcastJoin(
+      requiredDistribution: FlinkRelDistribution)
+  : (Boolean, FlinkRelDistribution, FlinkRelDistribution) = {
+    // Only Non-broadcast HashJoin could provide HashDistribution
+    if (requiredDistribution.getType != HASH_DISTRIBUTED) {
+      return (false, null, null)
+    }
+    // Full outer join cannot provide Hash distribute because it will generate 
null for left/right
+    // side if there is no match row.
+    if (joinType == JoinRelType.FULL) {
+      return (false, null, null)
+    }
+
+    val leftKeys = joinInfo.leftKeys
+    val rightKeys = joinInfo.rightKeys
+    val leftKeysToRightKeys = leftKeys.zip(rightKeys).toMap
+    val rightKeysToLeftKeys = rightKeys.zip(leftKeys).toMap
+    val leftFieldCnt = getLeft.getRowType.getFieldCount
+    val requiredShuffleKeys = requiredDistribution.getKeys
+    val requiredLeftShuffleKeys = mutable.ArrayBuffer[Int]()
+    val requiredRightShuffleKeys = mutable.ArrayBuffer[Int]()
+    requiredShuffleKeys.foreach { key =>
+      if (key < leftFieldCnt && joinType != JoinRelType.RIGHT) {
+        leftKeysToRightKeys.get(key) match {
+          case Some(rk) =>
+            requiredLeftShuffleKeys += key
+            requiredRightShuffleKeys += rk
+          case None if requiredDistribution.requireStrict =>
+            // Cannot satisfy required hash distribution due to required 
distribution is restrict
+            // however the key is not found in right
+            return (false, null, null)
+          case _ => // do nothing
+        }
+      } else if (key >= leftFieldCnt &&
+        (joinType == JoinRelType.RIGHT || joinType == JoinRelType.INNER)) {
+        val keysOnRightChild = key - leftFieldCnt
+        rightKeysToLeftKeys.get(keysOnRightChild) match {
+          case Some(lk) =>
+            requiredLeftShuffleKeys += lk
+            requiredRightShuffleKeys += keysOnRightChild
+          case None if requiredDistribution.requireStrict =>
+            // Cannot satisfy required hash distribution due to required 
distribution is restrict
+            // however the key is not found in left
+            return (false, null, null)
+          case _ => // do nothing
+        }
+      } else {
+        // cannot satisfy required hash distribution if requirement shuffle 
keys are not come from
+        // left side when Join is LOJ or are not come from right side when 
Join is ROJ.
+        return (false, null, null)
+      }
+    }
+    if (requiredLeftShuffleKeys.isEmpty) {
+      // the join can not satisfy the required hash distribution
+      // due to the required input shuffle keys are empty
+      return (false, null, null)
+    }
+
+    val (leftShuffleKeys, rightShuffleKeys) = if (joinType == 
JoinRelType.INNER &&
+      !requiredDistribution.requireStrict) {
+      (requiredLeftShuffleKeys.distinct, requiredRightShuffleKeys.distinct)
+    } else {
+      (requiredLeftShuffleKeys, requiredRightShuffleKeys)
+    }
+    (true,
+      FlinkRelDistribution.hash(ImmutableIntList.of(leftShuffleKeys: _*), 
requireStrict = true),
+      FlinkRelDistribution.hash(ImmutableIntList.of(rightShuffleKeys: _*), 
requireStrict = true))
+  }
+
+  /**
+    * Try to satisfy the given required traits on BroadcastJoin (including 
Broadcast-HashJoin and
+    * NestedLoopJoin).
+    *
+    * @param requiredTraitSet requirement traitSets
+    * @return Equivalent Join which satisfies required traitSet, return null if
+    *         requirement cannot be satisfied.
+    */
+  protected def satisfyTraitsOnBroadcastJoin(
+      requiredTraitSet: RelTraitSet,
+      leftIsBroadcast: Boolean): RelNode = {
+    val requiredDistribution = 
requiredTraitSet.getTrait(FlinkRelDistributionTraitDef.INSTANCE)
+    val keys = requiredDistribution.getKeys
+    val left = getLeft
+    val right = getRight
+    val leftFieldCnt = left.getRowType.getFieldCount
+    val canSatisfy = requiredDistribution.getType match {
+      case HASH_DISTRIBUTED | RANGE_DISTRIBUTED =>
+        // required distribution can be satisfied only if distribution keys 
all from
+        // non-broadcast side of BroadcastJoin
+        if (leftIsBroadcast) {
+          // all distribution keys must come from right child
+          keys.forall(_ >= leftFieldCnt)
+        } else {
+          // all distribution keys must come from left child
+          keys.forall(_ < leftFieldCnt)
+        }
+      // SINGLETON, BROADCAST_DISTRIBUTED, ANY, RANDOM_DISTRIBUTED, 
ROUND_ROBIN_DISTRIBUTED
+      // distribution cannot be pushed down.
+      case _ => false
+    }
+    if (!canSatisfy) {
+      return null
+    }
+
+    val keysInProbeSide = if (leftIsBroadcast) {
+      ImmutableIntList.of(keys.map(_ - leftFieldCnt): _ *)
+    } else {
+      keys
+    }
+
+    val inputRequiredDistribution = requiredDistribution.getType match {
+      case HASH_DISTRIBUTED =>
+        FlinkRelDistribution.hash(keysInProbeSide, 
requiredDistribution.requireStrict)
+      case RANGE_DISTRIBUTED =>
+        FlinkRelDistribution.range(keysInProbeSide)
+    }
+    // remove collation traits from input traits and provided traits
 
 Review comment:
   why not building from an empty traitset and add the traits you can provide?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to