peter-toth commented on code in PR #54330:
URL: https://github.com/apache/spark/pull/54330#discussion_r2851913028
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala:
##########
@@ -416,63 +480,43 @@ case class KeyGroupedPartitioning(
val result = KeyGroupedShuffleSpec(this, distribution)
if (SQLConf.get.v2BucketingAllowJoinKeysSubsetOfPartitionKeys) {
// If allowing join keys to be subset of clustering keys, we should
create a new
- // `KeyGroupedPartitioning` here that is grouped on the join keys
instead, and use that as
+ // `KeyedPartitioning` here that is grouped on the join keys instead,
and use that as
// the returned shuffle spec.
val joinKeyPositions =
result.keyPositions.map(_.nonEmpty).zipWithIndex.filter(_._1).map(_._2)
- val projectedPartitioning = KeyGroupedPartitioning(expressions,
joinKeyPositions,
- partitionValues, originalPartitionValues)
+ val (projectedExpressions, projectedDataTypes, projectedKeys,
projectedOriginalKeys) =
+ projectKeys(joinKeyPositions)
+ val projectedComparableWrapperFactory =
+
InternalRowComparableWrapper.getInternalRowComparableWrapperFactory(projectedDataTypes)
+ val distinctPartitionKeys =
projectedKeys.distinctBy(projectedComparableWrapperFactory)
+ val projectedPartitioning = copy(expressions = projectedExpressions,
+ partitionKeys = distinctPartitionKeys, originalPartitionKeys =
projectedOriginalKeys)
result.copy(partitioning = projectedPartitioning, joinKeyPositions =
Some(joinKeyPositions))
} else {
result
}
}
- lazy val uniquePartitionValues: Seq[InternalRow] = {
- val internalRowComparableFactory =
- InternalRowComparableWrapper.getInternalRowComparableWrapperFactory(
- expressions.map(_.dataType))
- partitionValues
- .map(internalRowComparableFactory)
- .distinct
- .map(_.row)
- }
+ override def equals(that: Any): Boolean = that match {
+ case k: KeyedPartitioning if this.expressions == k.expressions =>
Review Comment:
`KeyedPartitioning` is an expression so we have `.semanticEquals()` as well.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]