This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch 
gh-readonly-queue/main/pr-2182-5e5c16c597ed6dd1d6539d4c0c701972b166921d
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git

commit 0c19e088bf621397458102fde83cbf1020925be5
Author: Michael Victor Zink <[email protected]>
AuthorDate: Thu Feb 5 08:02:40 2026 -0800

    MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE (#2182)
---
 src/ast/helpers/stmt_create_database.rs | 28 ++++++++++
 src/ast/mod.rs                          | 14 +++++
 src/parser/mod.rs                       | 30 +++++++++++
 tests/sqlparser_mysql.rs                | 93 +++++++++++++++++++++++++++++++++
 4 files changed, 165 insertions(+)

diff --git a/src/ast/helpers/stmt_create_database.rs 
b/src/ast/helpers/stmt_create_database.rs
index c718dbce..e524228d 100644
--- a/src/ast/helpers/stmt_create_database.rs
+++ b/src/ast/helpers/stmt_create_database.rs
@@ -85,6 +85,14 @@ pub struct CreateDatabaseBuilder {
     pub storage_serialization_policy: Option<StorageSerializationPolicy>,
     /// Optional comment attached to the database.
     pub comment: Option<String>,
+    /// Optional default character set (MySQL).
+    ///
+    /// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
+    pub default_charset: Option<String>,
+    /// Optional default collation (MySQL).
+    ///
+    /// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
+    pub default_collation: Option<String>,
     /// Optional catalog sync configuration.
     pub catalog_sync: Option<String>,
     /// Optional catalog sync namespace mode.
@@ -120,6 +128,8 @@ impl CreateDatabaseBuilder {
             default_ddl_collation: None,
             storage_serialization_policy: None,
             comment: None,
+            default_charset: None,
+            default_collation: None,
             catalog_sync: None,
             catalog_sync_namespace_mode: None,
             catalog_sync_namespace_flatten_delimiter: None,
@@ -218,6 +228,18 @@ impl CreateDatabaseBuilder {
         self
     }
 
+    /// Set the default character set for the database.
+    pub fn default_charset(mut self, default_charset: Option<String>) -> Self {
+        self.default_charset = default_charset;
+        self
+    }
+
+    /// Set the default collation for the database.
+    pub fn default_collation(mut self, default_collation: Option<String>) -> 
Self {
+        self.default_collation = default_collation;
+        self
+    }
+
     /// Set the catalog sync for the database.
     pub fn catalog_sync(mut self, catalog_sync: Option<String>) -> Self {
         self.catalog_sync = catalog_sync;
@@ -272,6 +294,8 @@ impl CreateDatabaseBuilder {
             default_ddl_collation: self.default_ddl_collation,
             storage_serialization_policy: self.storage_serialization_policy,
             comment: self.comment,
+            default_charset: self.default_charset,
+            default_collation: self.default_collation,
             catalog_sync: self.catalog_sync,
             catalog_sync_namespace_mode: self.catalog_sync_namespace_mode,
             catalog_sync_namespace_flatten_delimiter: 
self.catalog_sync_namespace_flatten_delimiter,
@@ -302,6 +326,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
                 default_ddl_collation,
                 storage_serialization_policy,
                 comment,
+                default_charset,
+                default_collation,
                 catalog_sync,
                 catalog_sync_namespace_mode,
                 catalog_sync_namespace_flatten_delimiter,
@@ -323,6 +349,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
                 default_ddl_collation,
                 storage_serialization_policy,
                 comment,
+                default_charset,
+                default_collation,
                 catalog_sync,
                 catalog_sync_namespace_mode,
                 catalog_sync_namespace_flatten_delimiter,
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 1e626916..a26c14ef 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -4227,6 +4227,10 @@ pub enum Statement {
         storage_serialization_policy: Option<StorageSerializationPolicy>,
         /// Optional comment.
         comment: Option<String>,
+        /// Optional default character set (MySQL).
+        default_charset: Option<String>,
+        /// Optional default collation (MySQL).
+        default_collation: Option<String>,
         /// Optional catalog sync identifier.
         catalog_sync: Option<String>,
         /// Catalog sync namespace mode.
@@ -5081,6 +5085,8 @@ impl fmt::Display for Statement {
                 default_ddl_collation,
                 storage_serialization_policy,
                 comment,
+                default_charset,
+                default_collation,
                 catalog_sync,
                 catalog_sync_namespace_mode,
                 catalog_sync_namespace_flatten_delimiter,
@@ -5140,6 +5146,14 @@ impl fmt::Display for Statement {
                     write!(f, " COMMENT = '{comment}'")?;
                 }
 
+                if let Some(charset) = default_charset {
+                    write!(f, " DEFAULT CHARACTER SET {charset}")?;
+                }
+
+                if let Some(collation) = default_collation {
+                    write!(f, " DEFAULT COLLATE {collation}")?;
+                }
+
                 if let Some(sync) = catalog_sync {
                     write!(f, " CATALOG_SYNC = '{sync}'")?;
                 }
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index d9a5c518..bddafd1e 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -5341,6 +5341,34 @@ impl<'a> Parser<'a> {
             None
         };
 
+        // Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE 
options
+        //
+        // Note: The docs only mention `CHARACTER SET`, but `CHARSET` is also 
supported.
+        // Furthermore, MySQL will only accept one character set, raising an 
error if there is more
+        // than one, but will accept multiple collations and use the last one.
+        //
+        // <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
+        let mut default_charset = None;
+        let mut default_collation = None;
+        loop {
+            let has_default = self.parse_keyword(Keyword::DEFAULT);
+            if default_charset.is_none() && 
self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET])
+                || self.parse_keyword(Keyword::CHARSET)
+            {
+                let _ = self.consume_token(&Token::Eq);
+                default_charset = Some(self.parse_identifier()?.value);
+            } else if self.parse_keyword(Keyword::COLLATE) {
+                let _ = self.consume_token(&Token::Eq);
+                default_collation = Some(self.parse_identifier()?.value);
+            } else if has_default {
+                // DEFAULT keyword not followed by CHARACTER SET, CHARSET, or 
COLLATE
+                self.prev_token();
+                break;
+            } else {
+                break;
+            }
+        }
+
         Ok(Statement::CreateDatabase {
             db_name,
             if_not_exists: ine,
@@ -5357,6 +5385,8 @@ impl<'a> Parser<'a> {
             default_ddl_collation: None,
             storage_serialization_policy: None,
             comment: None,
+            default_charset,
+            default_collation,
             catalog_sync: None,
             catalog_sync_namespace_mode: None,
             catalog_sync_namespace_flatten_delimiter: None,
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index b719f2ef..d1e718f4 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -4621,3 +4621,96 @@ fn test_optimizer_hints() {
        DELETE /*+ foobar */ FROM table_name",
     );
 }
+
+#[test]
+fn parse_create_database_with_charset() {
+    // Test DEFAULT CHARACTER SET with = sign
+    mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER 
SET utf8mb4");
+
+    // Test DEFAULT CHARACTER SET without = sign (normalized form)
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4",
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+    );
+
+    // Test CHARACTER SET without DEFAULT
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb CHARACTER SET utf8mb4",
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+    );
+
+    // Test CHARSET shorthand
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb CHARSET utf8mb4",
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+    );
+
+    // Test DEFAULT CHARSET shorthand
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb DEFAULT CHARSET utf8mb4",
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+    );
+
+    // Test DEFAULT COLLATE
+    mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE 
utf8mb4_unicode_ci");
+
+    // Test COLLATE without DEFAULT
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci",
+        "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci",
+    );
+
+    // Test both CHARACTER SET and COLLATE together
+    mysql_and_generic().verified_stmt(
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE 
utf8mb4_unicode_ci",
+    );
+
+    // Test IF NOT EXISTS with CHARACTER SET
+    mysql_and_generic()
+        .verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER 
SET utf16");
+
+    // Test the exact syntax from the issue
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16",
+        "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16",
+    );
+}
+
+#[test]
+fn parse_create_database_with_charset_errors() {
+    // Missing charset name after CHARACTER SET
+    assert!(mysql_and_generic()
+        .parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET")
+        .is_err());
+
+    // Missing charset name after CHARSET
+    assert!(mysql_and_generic()
+        .parse_sql_statements("CREATE DATABASE mydb CHARSET")
+        .is_err());
+
+    // Missing collation name after COLLATE
+    assert!(mysql_and_generic()
+        .parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE")
+        .is_err());
+
+    // Equals sign but no value
+    assert!(mysql_and_generic()
+        .parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =")
+        .is_err());
+}
+
+#[test]
+fn parse_create_database_with_charset_option_ordering() {
+    // MySQL allows COLLATE before CHARACTER SET - output is normalized to 
CHARACTER SET first
+    // (matches MySQL's own SHOW CREATE DATABASE output order)
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT 
CHARACTER SET utf8mb4",
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE 
utf8mb4_unicode_ci",
+    );
+
+    // COLLATE first without DEFAULT keywords
+    mysql_and_generic().one_statement_parses_to(
+        "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET 
utf8mb4",
+        "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE 
utf8mb4_unicode_ci",
+    );
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to