shujingyang-db commented on code in PR #52153:
URL: https://github.com/apache/spark/pull/52153#discussion_r2312809264


##########
sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala:
##########
@@ -2785,6 +2785,41 @@ class DataFrameSuite extends QueryTest
     val df1 = df.select("a").orderBy("b").orderBy("all")
     checkAnswer(df1, Seq(Row(1), Row(4)))
   }
+
+  test("SPARK-53401: repartitionById - should partition rows to the specified 
" +
+    "partition ID") {
+    val numPartitions = 10
+    val df = spark.range(100).withColumn("p_id", col("id") % numPartitions)
+
+    val repartitioned = df.repartitionById(numPartitions, $"p_id")
+    val result = repartitioned.withColumn("actual_p_id", spark_partition_id())
+
+    assert(result.filter(col("p_id") =!= col("actual_p_id")).count() == 0)
+
+    assert(result.rdd.getNumPartitions == numPartitions)
+  }
+
+  test("SPARK-53401: repartitionById - should fail when partition ID is null") 
{
+    val df = spark.range(10).withColumn("p_id",
+      when(col("id") < 5, col("id")).otherwise(lit(null).cast("long"))
+    )
+    val repartitioned = df.repartitionById(5, $"p_id")
+
+    val e = intercept[SparkException] {
+      repartitioned.collect()
+    }
+    assert(e.getCause.isInstanceOf[IllegalArgumentException])
+    assert(e.getCause.getMessage.contains("The partition ID expression must 
not be null."))
+  }
+
+  test("SPARK-53401: repartitionById - should fail analysis for non-integral 
types") {
+    val df = spark.range(5).withColumn("s", lit("a"))
+    val e = intercept[AnalysisException] {
+      df.repartitionById(5, $"s").collect()
+    }
+    // Should fail with type error from DirectShufflePartitionID expression
+    assert(e.getMessage.contains("requires an integral type"))

Review Comment:
   In Pmod. 
   
   The full error message is 
   ```
   org.scalatest.exceptions.TestFailedException: "Job aborted due to stage 
failure: Task 1 in stage 9.0 failed 1 times, most recent failure: Lost task 1.0 
in stage 9.0 (TID 20) (192.168.1.72 executor driver): 
java.util.concurrent.ExecutionException: 
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 38, 
Column 71: Failed to compile: org.codehaus.commons.compiler.CompileException: 
File 'generated.java', Line 38, Column 71: Binary numeric promotion not 
possible on types "org.apache.spark.unsafe.types.UTF8String" and "int"
        at 
com.google.common.util.concurrent.AbstractFuture.getDoneValue(AbstractFuture.java:604)
        at 
com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:559)
        at 
com.google.common.util.concurrent.AbstractFuture$TrustedFuture.get(AbstractFuture.java:114)
        at 
com.google.common.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:247)
        at 
com.google.common.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2349)
        at 
com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2317)
        at 
com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2190)
        at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2080)
        at com.google.common.cache.LocalCache.get(LocalCache.java:4017)
        at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4040)
        at 
com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4989)
        at 
org.apache.spark.util.NonFateSharingLoadingCache.$anonfun$get$2(NonFateSharingCache.scala:108)
        at org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)
        at 
org.apache.spark.util.NonFateSharingLoadingCache.get(NonFateSharingCache.scala:108)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1490)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:378)
        at 
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.generate(GenerateUnsafeProjection.scala:327)
        at 
org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.createCodeGeneratedObject(Projection.scala:125)
        at 
org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.createCodeGeneratedObject(Projection.scala:121)
        at 
org.apache.spark.sql.catalyst.expressions.CodeGeneratorWithInterpretedFallback.createObject(CodeGeneratorWithInterpretedFallback.scala:45)
        at 
org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.create(Projection.scala:152)
        at 
org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.create(Projection.scala:162)
        at 
org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.getPartitionKeyExtractor$1(ShuffleExchangeExec.scala:408)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to