gene-db commented on code in PR #50517: URL: https://github.com/apache/spark/pull/50517#discussion_r2031702877
########## common/utils/src/main/resources/error/error-conditions.json: ########## @@ -3330,7 +3330,7 @@ }, "INVALID_SINGLE_VARIANT_COLUMN" : { "message" : [ - "The `singleVariantColumn` option cannot be used if there is also a user specified schema." + "User specified schema <schema> is invalid when the `singleVariantColumn` option is enabled." Review Comment: Are we allowed to change existing error codes/messages? ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DataSourceOptions.scala: ########## @@ -71,4 +76,38 @@ object DataSourceOptions { // as a single VARIANT type column in the table with the given column name. // E.g. spark.read.format("<data-source-format>").option("singleVariantColumn", "colName") val SINGLE_VARIANT_COLUMN = "singleVariantColumn" + // The common option name for all data sources that supports corrupt record. In case of a parsing + // error, the record will be stored as a string in the column with the given name. + // Theoretically, the behavior of this option is not affected by the parsing mode + // (PERMISSIVE/FAILFAST/DROPMALFORMED). However, the corrupt record is only visible to the user + // when in PERMISSIVE mode, because the queries will fail in FAILFAST mode, or the row containing + // the corrupt record will be dropped in DROPMALFORMED mode. + val COLUMN_NAME_OF_CORRUPT_RECORD = "columnNameOfCorruptRecord" + + // When `singleVariantColumn` is enabled and there is a user-specified schema, the schema must + // either be a variant field, or a variant field plus a corrupt column field. + def validateSingleVariantColumn( + options: CaseInsensitiveMap[String], + userSpecifiedSchema: Option[StructType]): Unit = { + (options.get(SINGLE_VARIANT_COLUMN), userSpecifiedSchema) match { + case (Some(col), Some(schema)) => Review Comment: NIT: `col` -> `variantColName` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org