yihua commented on code in PR #18674:
URL: https://github.com/apache/hudi/pull/18674#discussion_r3244129944
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala:
##########
@@ -466,6 +481,28 @@ trait SparkAdapter extends Serializable {
*/
def isVariantShreddingStruct(structType: StructType): Boolean
+ /**
+ * Checks if a StructType is the result of Spark 4.1's PushVariantIntoScan
rewriting — i.e.,
+ * every child field carries `VariantMetadata` describing a pushed-down
variant extraction.
+ *
+ * Returns false on Spark versions earlier than 4.1 (the rewriting only
happens there).
+ */
+ def isVariantProjectionStruct(structType: StructType): Boolean = false
+
+ /**
+ * If `sparkRequiredSchema` contains any field that's a Spark 4.1 variant
projection struct
+ * (i.e., the same-named field in `sparkDataSchema` is `VariantType`),
returns a row
+ * transformer that takes an InternalRow in the data-schema shape (with full
variants) and
+ * produces an InternalRow in the required-schema shape (with each variant
column projected
+ * to its requested struct via VariantGet).
+ *
+ * Used on the MOR log-file path: log records carry the full variant on
disk, but the merger
+ * expects rows aligned to the post-PushVariantIntoScan required schema.
Returns None when
+ * there's nothing to project (cheap fast-path for Spark < 4.1 and for
non-variant queries).
+ */
+ def buildVariantProjector(sparkDataSchema: StructType,
+ sparkRequiredSchema: StructType):
Option[InternalRow => InternalRow] = None
Review Comment:
#18739 to follow up
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]