KurtYoung commented on a change in pull request #8499: [FLINK-12575] [table-planner-blink] Introduce planner rules to remove redundant shuffle and collation URL: https://github.com/apache/flink/pull/8499#discussion_r287204228
########## File path: flink-table/flink-table-planner-blink/src/main/scala/org/apache/flink/table/plan/nodes/physical/batch/BatchExecJoinBase.scala ########## @@ -70,4 +76,150 @@ abstract class BatchExecJoinBase( "JoinConditionFunction", body) } + + /** + * Try to satisfy hash distribution on Non-BroadcastJoin (including SortMergeJoin and + * Non-Broadcast HashJoin). + * + * @param requiredDistribution distribution requirement + * @return a Tuple including 3 element. + * The first element is a flag which indicates whether the requirement can be satisfied. + * The second element is the distribution requirement of left child if the requirement + * can be push down into join. + * The third element is the distribution requirement of right child if the requirement + * can be push down into join. + */ + def satisfyHashDistributionOnNonBroadcastJoin( + requiredDistribution: FlinkRelDistribution) + : (Boolean, FlinkRelDistribution, FlinkRelDistribution) = { + // Only Non-broadcast HashJoin could provide HashDistribution + if (requiredDistribution.getType != HASH_DISTRIBUTED) { + return (false, null, null) + } + // Full outer join cannot provide Hash distribute because it will generate null for left/right + // side if there is no match row. + if (joinType == JoinRelType.FULL) { + return (false, null, null) + } + + val leftKeys = joinInfo.leftKeys + val rightKeys = joinInfo.rightKeys + val leftKeysToRightKeys = leftKeys.zip(rightKeys).toMap + val rightKeysToLeftKeys = rightKeys.zip(leftKeys).toMap + val leftFieldCnt = getLeft.getRowType.getFieldCount + val requiredShuffleKeys = requiredDistribution.getKeys + val requiredLeftShuffleKeys = mutable.ArrayBuffer[Int]() + val requiredRightShuffleKeys = mutable.ArrayBuffer[Int]() + requiredShuffleKeys.foreach { key => + if (key < leftFieldCnt && joinType != JoinRelType.RIGHT) { Review comment: so this condition will include SEMI and ANTI, but not for the following condition branch `joinType == JoinRelType.RIGHT || joinType == JoinRelType.INNER)`? I think make this explicitly would be better ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services