chris-twiner commented on code in PR #50023:
URL: https://github.com/apache/spark/pull/50023#discussion_r1971695592


##########
sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala:
##########
@@ -2841,6 +2860,265 @@ class DatasetSuite extends QueryTest
     checkDataset(Seq(seqMutableSet).toDS(), seqMutableSet)
     checkDataset(Seq(mapMutableSet).toDS(), mapMutableSet)
   }
+
+  // below tests are related to SPARK-49960 and TransformingEncoder usage
+  test("Incorrect derived nullability with TransformingEncoder - non 
nullable") {
+    val sparkI = spark
+    type T = Tuple2[Seq[Seq[Int]], Seq[Int]]
+    val data: Seq[T] = Seq( ( Seq( Seq(1, 2, 3) ), Seq(1, 2, 3) ) )
+    // for reference only
+    val sparkDataTypeOG = {
+      import sparkI.implicits._
+      val ds = spark.createDataset[Tuple2[Seq[Seq[Int]], Seq[Int]]](data)
+      ds.schema
+    }
+
+    val provider = () =>
+      new Codec[Seq[Int], Seq[Int]]{
+        override def encode(in: Seq[Int]): Seq[Int] = in
+        override def decode(out: Seq[Int]): Seq[Int] = out
+      }
+
+    val transformingSeq =
+      TransformingEncoder[Seq[Int], Seq[Int]](
+        implicitly[ClassTag[Seq[Int]]],
+        IterableEncoder[Seq[Int], Int](implicitly[ClassTag[Seq[Int]]],
+          PrimitiveIntEncoder, false, false),
+        provider
+      )
+
+    val enc =
+      ProductEncoder(
+        implicitly[ClassTag[T]],
+        Seq(
+          EncoderField("_1",
+            IterableEncoder[Seq[Seq[Int]], 
Seq[Int]](implicitly[ClassTag[Seq[Seq[Int]]]],
+              transformingSeq, false, false), false, Metadata.empty),
+          EncoderField( "_2", transformingSeq, false, Metadata.empty)
+        ),
+        None
+      )
+    val sparkViaAgnostic = {
+      val ds = spark.createDataset(data)(enc)
+      ds.schema
+    }
+    // the nullability without TransformingEncoder nullability 
(SerializerBuilderHelper)
+    // is incorrect (_2 is inferred as nullable)
+    assert(enc.dataType === sparkViaAgnostic)
+  }
+
+  def provider[A]: () => Codec[V[A], A] = () =>
+    new Codec[V[A], A]{
+      override def encode(in: V[A]): A = in.v
+      override def decode(out: A): V[A] = V(out)
+    }
+
+  def transforming[A](underlying: AgnosticEncoder[A]): 
TransformingEncoder[V[A], A] =
+    TransformingEncoder[V[A], A](
+      implicitly[ClassTag[V[A]]],
+      underlying,
+      provider
+    )
+
+  val V_INT = StructType(Seq(StructField("v", IntegerType, nullable = false)))
+
+  // "value" usage for single field, a wrapping nullable type is required
+  val OPTION_OF_V_INT = StructType(Seq(StructField("value",
+    V_INT, nullable = true)))
+
+  // product encoder for a non-nullable V
+  val V_OF_INT =
+    ProductEncoder(
+      classTag[V[Int]],
+      Seq(EncoderField("v", PrimitiveIntEncoder, nullable = false, 
Metadata.empty)),
+      None
+    )
+
+  test("""Encoder derivation with nested TransformingEncoder of 
OptionEncoder""".stripMargin) {
+    val sparkI = spark
+    type T = V[V[Option[V[Int]]]]
+    val data: Seq[T] = Seq(V(V(None)), V(V(Some(V(1)))))
+    // for reference - datatype will introduce nested classes as expected
+    val sparkDataTypeOG = {
+      import sparkI.implicits._
+      val ds = spark.createDataset[V[V[Option[V[Int]]]]](data)
+      ds.schema
+    }
+
+    /* attempt to behave as if value class semantics except the last product,
+      using a final transforming instead of a product serializes */
+    val enc =
+      transforming(
+        transforming(
+          OptionEncoder(
+            // works
+            // transforming(PrimitiveIntEncoder)

Review Comment:
   no, I've removed that along with the other visual cruft



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to