KurtYoung commented on a change in pull request #8499: [FLINK-12575] [table-planner-blink] Introduce planner rules to remove redundant shuffle and collation URL: https://github.com/apache/flink/pull/8499#discussion_r287198770
########## File path: flink-table/flink-table-planner-blink/src/main/scala/org/apache/flink/table/plan/nodes/physical/batch/BatchExecHashAggregate.scala ########## @@ -94,6 +99,53 @@ class BatchExecHashAggregate( isGlobal = true)) } + override def satisfyTraits(requiredTraitSet: RelTraitSet): RelNode = { + val requiredDistribution = requiredTraitSet.getTrait(FlinkRelDistributionTraitDef.INSTANCE) + val canSatisfy = requiredDistribution.getType match { + case SINGLETON => grouping.length == 0 + case HASH_DISTRIBUTED => + val shuffleKeys = requiredDistribution.getKeys + val groupKeysList = ImmutableIntList.of(grouping.indices.toArray: _*) + if (requiredDistribution.requireStrict) { + shuffleKeys == groupKeysList + } else if (Util.startsWith(shuffleKeys, groupKeysList)) { + // If required distribution is not strict, Hash[a] can satisfy Hash[a, b]. + // so return true if shuffleKeys(Hash[a, b]) start with groupKeys(Hash[a]) + true + } else { + // If partialKey is enabled, try to use partial key to satisfy the required distribution + val tableConfig = FlinkRelOptUtil.getTableConfigFromContext(this) + val partialKeyEnabled = tableConfig.getConf.getBoolean( + PlannerConfigOptions.SQL_OPTIMIZER_SHUFFLE_PARTIAL_KEY_ENABLED) + partialKeyEnabled && groupKeysList.containsAll(shuffleKeys) + } + case _ => false + } + if (!canSatisfy) { + return null + } + + val inputRequiredDistribution = requiredDistribution.getType match { + case SINGLETON => requiredDistribution + case HASH_DISTRIBUTED => + val shuffleKeys = requiredDistribution.getKeys + val groupKeysList = ImmutableIntList.of(grouping.indices.toArray: _*) + if (requiredDistribution.requireStrict) { + FlinkRelDistribution.hash(grouping, requireStrict = true) + } else if (Util.startsWith(shuffleKeys, groupKeysList)) { + // Hash[a] can satisfy Hash[a, b] + FlinkRelDistribution.hash(grouping, requireStrict = false) + } else { + // use partial key to satisfy the required distribution + FlinkRelDistribution.hash(shuffleKeys.map(grouping(_)).toArray, requireStrict = false) Review comment: why `shuffleKeys.map(grouping(_))`? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services