dejankrak-db commented on code in PR #50937: URL: https://github.com/apache/spark/pull/50937#discussion_r2099022053
########## sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4: ########## @@ -191,11 +191,14 @@ statement | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference (commentSpec | locationSpec | + collationSpec | (WITH (DBPROPERTIES | PROPERTIES) propertyList))* #createNamespace | ALTER namespace identifierReference SET (DBPROPERTIES | PROPERTIES) propertyList #setNamespaceProperties | ALTER namespace identifierReference UNSET (DBPROPERTIES | PROPERTIES) propertyList #unsetNamespaceProperties + | ALTER namespace identifierReference + SET collationSpec #setNamespaceCollation Review Comment: This should be without SET, per ref spec, similar as in ALTER TABLE command: ALTER TABLE identifierReference collationSpec ########## sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala: ########## @@ -918,6 +918,20 @@ class QueryCompilationErrorsSuite } } + test("SPARK-52219: the schema level collations feature is unsupported") { + // TODO: when schema level collations are supported, change this test to set the flag to false + Seq( + "CREATE SCHEMA test_schema DEFAULT COLLATION UNICODE", + "ALTER SCHEMA test_schema SET DEFAULT COLLATION UNICODE" Review Comment: Should remove SET once parser is fixed ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala: ########## @@ -646,6 +646,17 @@ case class SetNamespaceProperties( copy(namespace = newChild) } +/** + * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET DEFAULT COLLATION command. Review Comment: Extra SET keyword ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala: ########## @@ -32,17 +33,25 @@ import org.apache.spark.sql.types.{DataType, StringType} */ object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - fetchDefaultCollation(plan) match { + val objectLevelDefaultCollation = fetchObjectLevelDefaultCollation(plan) + val schemaLevelDefaultCollation = fetchSchemaLevelDefaultCollation(plan) Review Comment: Hmm, I see that schemaLevelDefaultCollation is also used below in tryToInheritCollationIfMissing, where it is used if table collation is not specified. Still, this is just in specific cases - consider whether fetching schema collation can be run only if required. ########## sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala: ########## @@ -129,6 +142,91 @@ abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSessi } } + Seq( + // (schemaDefaultCollation, tableDefaultCollation) + ("UTF8_BINARY", None), + ("UTF8_LCASE", None), + ("UNICODE", None), + ("DE", None), + ("UTF8_BINARY", Some("UTF8_BINARY")), + ("UTF8_BINARY", Some("UTF8_LCASE")), + ("UTF8_BINARY", Some("DE")), + ("UTF8_LCASE", Some("UTF8_BINARY")), + ("UTF8_LCASE", Some("UTF8_LCASE")), + ("UTF8_LCASE", Some("DE")) + ).foreach { + case (schemaDefaultCollation, tableDefaultCollation) => + test( + s"""CREATE table with schema level collation + | (schema default collation = $schemaDefaultCollation, + | table default collation = $tableDefaultCollation)""".stripMargin) { + testCreateTableWithSchemaLevelCollation( + schemaDefaultCollation, tableDefaultCollation) + } + + test( + s"""ALTER table with schema level collation + | (schema default collation = $schemaDefaultCollation, + | table default collation = $tableDefaultCollation)""".stripMargin) { + testAlterTableWithSchemaLevelCollation( + schemaDefaultCollation, tableDefaultCollation) + } + } + + Seq( + // (schemaOldCollation, schemaNewCollation) + (None, "UTF8_BINARY"), + (None, "UTF8_LCASE"), + (None, "DE"), + (Some("UTF8_BINARY"), "UTF8_BINARY"), + (Some("UTF8_BINARY"), "UTF8_LCASE"), + (Some("UTF8_BINARY"), "DE"), + (Some("UTF8_LCASE"), "UTF8_BINARY"), + (Some("UTF8_LCASE"), "UTF8_LCASE"), + (Some("UTF8_LCASE"), "DE") + ).foreach { + case (schemaOldCollation, schemaNewCollation) => + val schemaOldCollationDefaultClause = + if (schemaOldCollation.isDefined) { + s"DEFAULT COLLATION ${schemaOldCollation.get}" + } else { + "" + } + + test( + s"""ALTER schema default collation (old schema default collation = $schemaOldCollation, + | new schema default collation = $schemaNewCollation)""".stripMargin) { + withSchema(SESSION_CATALOG_NAME, "DEFAULT", testSchema) { + sql(s"CREATE SCHEMA $testSchema $schemaOldCollationDefaultClause") + sql(s"USE $testSchema") + + withTable(testTable) { + sql(s"CREATE TABLE $testTable (c1 STRING, c2 STRING COLLATE SR_AI)") + val tableDefaultCollation = + if (schemaOldCollation.isDefined) { + schemaOldCollation.get + } else { + "UTF8_BINARY" + } + + // ALTER SCHEMA + sql(s"ALTER SCHEMA $testSchema SET DEFAULT COLLATION $schemaNewCollation") Review Comment: Should remove SET once parser is fixed ########## sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala: ########## @@ -143,6 +145,27 @@ case class AlterDatabasePropertiesCommand( } } +/** + * A command for users to set new default collation for a database. + * If the database does not exist, an error message will be issued to indicate the database + * does not exist. + * The syntax of using this command in SQL is: + * {{{ + * ALTER (DATABASE|SCHEMA) database_name SET DEFAULT COLLATION collation Review Comment: Please fix these method names and comments not to include SET ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala: ########## @@ -32,17 +33,25 @@ import org.apache.spark.sql.types.{DataType, StringType} */ object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - fetchDefaultCollation(plan) match { + val objectLevelDefaultCollation = fetchObjectLevelDefaultCollation(plan) + val schemaLevelDefaultCollation = fetchSchemaLevelDefaultCollation(plan) Review Comment: Not sure how expensive this is, but it seems that we don't have to call fetchSchemaLevelDefaultCollation unless objectLevelDefaultCollation is None, so we can optimize this. ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala: ########## @@ -4509,6 +4518,25 @@ class AstBuilder extends DataTypeAstBuilder } } + /** + * Create an [[SetNamespaceCollation]] logical plan. + * + * For example: + * {{{ + * ALTER (DATABASE|SCHEMA|NAMESPACE) namespace SET DEFAULT COLLATION collation; Review Comment: Should be without SET -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org