ericm-db commented on code in PR #49277:
URL: https://github.com/apache/spark/pull/49277#discussion_r1915281221


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala:
##########
@@ -161,24 +179,67 @@ class StateSchemaCompatibilityChecker(
    * @param ignoreValueSchema - whether to ignore value schema or not
    */
   private def check(
-      oldSchema: StateStoreColFamilySchema,
+      oldSchemas: List[StateStoreColFamilySchema],
       newSchema: StateStoreColFamilySchema,
-      ignoreValueSchema: Boolean) : Unit = {
-    val (storedKeySchema, storedValueSchema) = (oldSchema.keySchema,
-      oldSchema.valueSchema)
+      ignoreValueSchema: Boolean,
+      schemaEvolutionEnabled: Boolean): (StateStoreColFamilySchema, Boolean) = 
{
+
+    def incrementSchemaId(id: Short): Short = (id + 1).toShort
+
+    val mostRecentSchema = oldSchemas.last
+    // Initialize with old schema IDs
+    val resultSchema = newSchema.copy(
+      keySchemaId = mostRecentSchema.keySchemaId,
+      valueSchemaId = mostRecentSchema.valueSchemaId
+    )
+    val (storedKeySchema, storedValueSchema) = (mostRecentSchema.keySchema,
+      mostRecentSchema.valueSchema)
     val (keySchema, valueSchema) = (newSchema.keySchema, newSchema.valueSchema)
 
     if (storedKeySchema.equals(keySchema) &&
       (ignoreValueSchema || storedValueSchema.equals(valueSchema))) {
       // schema is exactly same
+      (mostRecentSchema, false)
     } else if (!schemasCompatible(storedKeySchema, keySchema)) {
       throw 
StateStoreErrors.stateStoreKeySchemaNotCompatible(storedKeySchema.toString,
         keySchema.toString)
+    } else if (!ignoreValueSchema && schemaEvolutionEnabled) {
+      // Check value schema evolution
+      // Sort schemas by most recent to least recent
+      val oldAvroSchemas = oldSchemas.sortBy(_.valueSchemaId).reverse.map { 
oldSchema =>
+        SchemaConverters.toAvroTypeWithDefaults(oldSchema.valueSchema)
+      }.asJava
+      val l = oldSchemas.sortBy(_.valueSchemaId).reverse.map { oldSchema =>
+        SchemaConverters.toAvroTypeWithDefaults(oldSchema.valueSchema)
+      }
+      val newAvroSchema = SchemaConverters.toAvroTypeWithDefaults(valueSchema)
+
+      val validator = new 
SchemaValidatorBuilder().canReadStrategy.validateAll()
+      try {
+        validator.validate(newAvroSchema, oldAvroSchemas)
+      } catch {
+        case s: SchemaValidationException =>
+          throw StateStoreErrors.stateStoreInvalidValueSchemaEvolution(

Review Comment:
   How is this error message:
   ```
   [info] - test that invalid schema evolution fails query for column family 
(encoding = avro) *** FAILED *** (1 second, 914 milliseconds)
   [info]   
org.apache.spark.sql.execution.streaming.state.StateStoreInvalidValueSchemaEvolution:
 [STATE_STORE_INVALID_VALUE_SCHEMA_EVOLUTION] Schema evolution is not possible 
new value_schema=StructType(StructField(value,LongType,false)) and old 
value_schema=StructType(StructField(value,IntegerType,false))
   [info] Please check 
https://avro.apache.org/docs/1.11.1/specification/_print/#schema-resolution for 
valid schema evolution. SQLSTATE: XXKST
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to