dejankrak-db commented on code in PR #50937:
URL: https://github.com/apache/spark/pull/50937#discussion_r2099022053


##########
sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4:
##########
@@ -191,11 +191,14 @@ statement
     | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference
         (commentSpec |
          locationSpec |
+         collationSpec |
          (WITH (DBPROPERTIES | PROPERTIES) propertyList))*             
#createNamespace
     | ALTER namespace identifierReference
         SET (DBPROPERTIES | PROPERTIES) propertyList                   
#setNamespaceProperties
     | ALTER namespace identifierReference
         UNSET (DBPROPERTIES | PROPERTIES) propertyList                 
#unsetNamespaceProperties
+    | ALTER namespace identifierReference
+        SET collationSpec                                              
#setNamespaceCollation

Review Comment:
   This should be without SET, per ref spec, similar as in ALTER TABLE command:
   ALTER TABLE identifierReference collationSpec



##########
sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala:
##########
@@ -918,6 +918,20 @@ class QueryCompilationErrorsSuite
     }
   }
 
+  test("SPARK-52219: the schema level collations feature is unsupported") {
+    // TODO: when schema level collations are supported, change this test to 
set the flag to false
+    Seq(
+      "CREATE SCHEMA test_schema DEFAULT COLLATION UNICODE",
+      "ALTER SCHEMA test_schema SET DEFAULT COLLATION UNICODE"

Review Comment:
   Should remove SET once parser is fixed



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala:
##########
@@ -646,6 +646,17 @@ case class SetNamespaceProperties(
     copy(namespace = newChild)
 }
 
+/**
+ * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET DEFAULT 
COLLATION command.

Review Comment:
   Extra SET keyword



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala:
##########
@@ -32,17 +33,25 @@ import org.apache.spark.sql.types.{DataType, StringType}
  */
 object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    fetchDefaultCollation(plan) match {
+    val objectLevelDefaultCollation = fetchObjectLevelDefaultCollation(plan)
+    val schemaLevelDefaultCollation = fetchSchemaLevelDefaultCollation(plan)

Review Comment:
   Hmm, I see that schemaLevelDefaultCollation is also used below in 
tryToInheritCollationIfMissing, where it is used if table collation is not 
specified. Still, this is just in specific cases - consider whether fetching 
schema collation can be run only if required.



##########
sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala:
##########
@@ -129,6 +142,91 @@ abstract class DefaultCollationTestSuite extends QueryTest 
with SharedSparkSessi
     }
   }
 
+  Seq(
+    // (schemaDefaultCollation, tableDefaultCollation)
+    ("UTF8_BINARY", None),
+    ("UTF8_LCASE", None),
+    ("UNICODE", None),
+    ("DE", None),
+    ("UTF8_BINARY", Some("UTF8_BINARY")),
+    ("UTF8_BINARY", Some("UTF8_LCASE")),
+    ("UTF8_BINARY", Some("DE")),
+    ("UTF8_LCASE", Some("UTF8_BINARY")),
+    ("UTF8_LCASE", Some("UTF8_LCASE")),
+    ("UTF8_LCASE", Some("DE"))
+  ).foreach {
+    case (schemaDefaultCollation, tableDefaultCollation) =>
+      test(
+        s"""CREATE table with schema level collation
+          | (schema default collation = $schemaDefaultCollation,
+          | table default collation = $tableDefaultCollation)""".stripMargin) {
+        testCreateTableWithSchemaLevelCollation(
+          schemaDefaultCollation, tableDefaultCollation)
+      }
+
+      test(
+        s"""ALTER table with schema level collation
+          | (schema default collation = $schemaDefaultCollation,
+          | table default collation = $tableDefaultCollation)""".stripMargin) {
+        testAlterTableWithSchemaLevelCollation(
+         schemaDefaultCollation, tableDefaultCollation)
+      }
+  }
+
+  Seq(
+    // (schemaOldCollation, schemaNewCollation)
+    (None, "UTF8_BINARY"),
+    (None, "UTF8_LCASE"),
+    (None, "DE"),
+    (Some("UTF8_BINARY"), "UTF8_BINARY"),
+    (Some("UTF8_BINARY"), "UTF8_LCASE"),
+    (Some("UTF8_BINARY"), "DE"),
+    (Some("UTF8_LCASE"), "UTF8_BINARY"),
+    (Some("UTF8_LCASE"), "UTF8_LCASE"),
+    (Some("UTF8_LCASE"), "DE")
+  ).foreach {
+    case (schemaOldCollation, schemaNewCollation) =>
+      val schemaOldCollationDefaultClause =
+        if (schemaOldCollation.isDefined) {
+          s"DEFAULT COLLATION ${schemaOldCollation.get}"
+        } else {
+          ""
+        }
+
+      test(
+        s"""ALTER schema default collation (old schema default collation = 
$schemaOldCollation,
+          | new schema default collation = 
$schemaNewCollation)""".stripMargin) {
+        withSchema(SESSION_CATALOG_NAME, "DEFAULT", testSchema) {
+          sql(s"CREATE SCHEMA $testSchema $schemaOldCollationDefaultClause")
+          sql(s"USE $testSchema")
+
+          withTable(testTable) {
+            sql(s"CREATE TABLE $testTable (c1 STRING, c2 STRING COLLATE 
SR_AI)")
+            val tableDefaultCollation =
+              if (schemaOldCollation.isDefined) {
+                schemaOldCollation.get
+              } else {
+                "UTF8_BINARY"
+              }
+
+            // ALTER SCHEMA
+            sql(s"ALTER SCHEMA $testSchema SET DEFAULT COLLATION 
$schemaNewCollation")

Review Comment:
   Should remove SET once parser is fixed



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala:
##########
@@ -143,6 +145,27 @@ case class AlterDatabasePropertiesCommand(
   }
 }
 
+/**
+ * A command for users to set new default collation for a database.
+ * If the database does not exist, an error message will be issued to indicate 
the database
+ * does not exist.
+ * The syntax of using this command in SQL is:
+ * {{{
+ *    ALTER (DATABASE|SCHEMA) database_name SET DEFAULT COLLATION collation

Review Comment:
   Please fix these method names and comments not to include SET



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala:
##########
@@ -32,17 +33,25 @@ import org.apache.spark.sql.types.{DataType, StringType}
  */
 object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    fetchDefaultCollation(plan) match {
+    val objectLevelDefaultCollation = fetchObjectLevelDefaultCollation(plan)
+    val schemaLevelDefaultCollation = fetchSchemaLevelDefaultCollation(plan)

Review Comment:
   Not sure how expensive this is, but it seems that we don't have to call 
fetchSchemaLevelDefaultCollation unless objectLevelDefaultCollation is None, so 
we can optimize this.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:
##########
@@ -4509,6 +4518,25 @@ class AstBuilder extends DataTypeAstBuilder
     }
   }
 
+  /**
+   * Create an [[SetNamespaceCollation]] logical plan.
+   *
+   * For example:
+   * {{{
+   *   ALTER (DATABASE|SCHEMA|NAMESPACE) namespace SET DEFAULT COLLATION 
collation;

Review Comment:
   Should be without SET



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to