KurtYoung commented on a change in pull request #8499: [FLINK-12575] [table-planner-blink] Introduce planner rules to remove redundant shuffle and collation URL: https://github.com/apache/flink/pull/8499#discussion_r287205339
########## File path: flink-table/flink-table-planner-blink/src/main/scala/org/apache/flink/table/plan/nodes/physical/batch/BatchExecJoinBase.scala ########## @@ -70,4 +76,150 @@ abstract class BatchExecJoinBase( "JoinConditionFunction", body) } + + /** + * Try to satisfy hash distribution on Non-BroadcastJoin (including SortMergeJoin and + * Non-Broadcast HashJoin). + * + * @param requiredDistribution distribution requirement + * @return a Tuple including 3 element. + * The first element is a flag which indicates whether the requirement can be satisfied. + * The second element is the distribution requirement of left child if the requirement + * can be push down into join. + * The third element is the distribution requirement of right child if the requirement + * can be push down into join. + */ + def satisfyHashDistributionOnNonBroadcastJoin( + requiredDistribution: FlinkRelDistribution) + : (Boolean, FlinkRelDistribution, FlinkRelDistribution) = { + // Only Non-broadcast HashJoin could provide HashDistribution + if (requiredDistribution.getType != HASH_DISTRIBUTED) { + return (false, null, null) + } + // Full outer join cannot provide Hash distribute because it will generate null for left/right + // side if there is no match row. + if (joinType == JoinRelType.FULL) { + return (false, null, null) + } + + val leftKeys = joinInfo.leftKeys + val rightKeys = joinInfo.rightKeys + val leftKeysToRightKeys = leftKeys.zip(rightKeys).toMap + val rightKeysToLeftKeys = rightKeys.zip(leftKeys).toMap + val leftFieldCnt = getLeft.getRowType.getFieldCount + val requiredShuffleKeys = requiredDistribution.getKeys + val requiredLeftShuffleKeys = mutable.ArrayBuffer[Int]() + val requiredRightShuffleKeys = mutable.ArrayBuffer[Int]() + requiredShuffleKeys.foreach { key => + if (key < leftFieldCnt && joinType != JoinRelType.RIGHT) { + leftKeysToRightKeys.get(key) match { + case Some(rk) => + requiredLeftShuffleKeys += key + requiredRightShuffleKeys += rk + case None if requiredDistribution.requireStrict => + // Cannot satisfy required hash distribution due to required distribution is restrict + // however the key is not found in right + return (false, null, null) + case _ => // do nothing + } + } else if (key >= leftFieldCnt && + (joinType == JoinRelType.RIGHT || joinType == JoinRelType.INNER)) { + val keysOnRightChild = key - leftFieldCnt + rightKeysToLeftKeys.get(keysOnRightChild) match { + case Some(lk) => + requiredLeftShuffleKeys += lk + requiredRightShuffleKeys += keysOnRightChild + case None if requiredDistribution.requireStrict => + // Cannot satisfy required hash distribution due to required distribution is restrict + // however the key is not found in left + return (false, null, null) + case _ => // do nothing + } + } else { + // cannot satisfy required hash distribution if requirement shuffle keys are not come from + // left side when Join is LOJ or are not come from right side when Join is ROJ. + return (false, null, null) + } + } + if (requiredLeftShuffleKeys.isEmpty) { + // the join can not satisfy the required hash distribution + // due to the required input shuffle keys are empty + return (false, null, null) + } + + val (leftShuffleKeys, rightShuffleKeys) = if (joinType == JoinRelType.INNER && + !requiredDistribution.requireStrict) { + (requiredLeftShuffleKeys.distinct, requiredRightShuffleKeys.distinct) + } else { + (requiredLeftShuffleKeys, requiredRightShuffleKeys) + } + (true, + FlinkRelDistribution.hash(ImmutableIntList.of(leftShuffleKeys: _*), requireStrict = true), + FlinkRelDistribution.hash(ImmutableIntList.of(rightShuffleKeys: _*), requireStrict = true)) + } + + /** + * Try to satisfy the given required traits on BroadcastJoin (including Broadcast-HashJoin and + * NestedLoopJoin). + * + * @param requiredTraitSet requirement traitSets + * @return Equivalent Join which satisfies required traitSet, return null if + * requirement cannot be satisfied. + */ + protected def satisfyTraitsOnBroadcastJoin( + requiredTraitSet: RelTraitSet, + leftIsBroadcast: Boolean): RelNode = { + val requiredDistribution = requiredTraitSet.getTrait(FlinkRelDistributionTraitDef.INSTANCE) + val keys = requiredDistribution.getKeys + val left = getLeft + val right = getRight + val leftFieldCnt = left.getRowType.getFieldCount + val canSatisfy = requiredDistribution.getType match { + case HASH_DISTRIBUTED | RANGE_DISTRIBUTED => + // required distribution can be satisfied only if distribution keys all from + // non-broadcast side of BroadcastJoin + if (leftIsBroadcast) { + // all distribution keys must come from right child + keys.forall(_ >= leftFieldCnt) + } else { + // all distribution keys must come from left child + keys.forall(_ < leftFieldCnt) + } + // SINGLETON, BROADCAST_DISTRIBUTED, ANY, RANDOM_DISTRIBUTED, ROUND_ROBIN_DISTRIBUTED + // distribution cannot be pushed down. + case _ => false + } + if (!canSatisfy) { + return null + } + + val keysInProbeSide = if (leftIsBroadcast) { + ImmutableIntList.of(keys.map(_ - leftFieldCnt): _ *) + } else { + keys + } + + val inputRequiredDistribution = requiredDistribution.getType match { + case HASH_DISTRIBUTED => + FlinkRelDistribution.hash(keysInProbeSide, requiredDistribution.requireStrict) + case RANGE_DISTRIBUTED => + FlinkRelDistribution.range(keysInProbeSide) + } + // remove collation traits from input traits and provided traits Review comment: why not building from an empty traitset and add the traits you can provide? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services