This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0c19e088 MySQL: Add support for DEFAULT CHARACTER SET in CREATE
DATABASE (#2182)
0c19e088 is described below
commit 0c19e088bf621397458102fde83cbf1020925be5
Author: Michael Victor Zink <[email protected]>
AuthorDate: Thu Feb 5 08:02:40 2026 -0800
MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE (#2182)
---
src/ast/helpers/stmt_create_database.rs | 28 ++++++++++
src/ast/mod.rs | 14 +++++
src/parser/mod.rs | 30 +++++++++++
tests/sqlparser_mysql.rs | 93 +++++++++++++++++++++++++++++++++
4 files changed, 165 insertions(+)
diff --git a/src/ast/helpers/stmt_create_database.rs
b/src/ast/helpers/stmt_create_database.rs
index c718dbce..e524228d 100644
--- a/src/ast/helpers/stmt_create_database.rs
+++ b/src/ast/helpers/stmt_create_database.rs
@@ -85,6 +85,14 @@ pub struct CreateDatabaseBuilder {
pub storage_serialization_policy: Option<StorageSerializationPolicy>,
/// Optional comment attached to the database.
pub comment: Option<String>,
+ /// Optional default character set (MySQL).
+ ///
+ /// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
+ pub default_charset: Option<String>,
+ /// Optional default collation (MySQL).
+ ///
+ /// <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
+ pub default_collation: Option<String>,
/// Optional catalog sync configuration.
pub catalog_sync: Option<String>,
/// Optional catalog sync namespace mode.
@@ -120,6 +128,8 @@ impl CreateDatabaseBuilder {
default_ddl_collation: None,
storage_serialization_policy: None,
comment: None,
+ default_charset: None,
+ default_collation: None,
catalog_sync: None,
catalog_sync_namespace_mode: None,
catalog_sync_namespace_flatten_delimiter: None,
@@ -218,6 +228,18 @@ impl CreateDatabaseBuilder {
self
}
+ /// Set the default character set for the database.
+ pub fn default_charset(mut self, default_charset: Option<String>) -> Self {
+ self.default_charset = default_charset;
+ self
+ }
+
+ /// Set the default collation for the database.
+ pub fn default_collation(mut self, default_collation: Option<String>) ->
Self {
+ self.default_collation = default_collation;
+ self
+ }
+
/// Set the catalog sync for the database.
pub fn catalog_sync(mut self, catalog_sync: Option<String>) -> Self {
self.catalog_sync = catalog_sync;
@@ -272,6 +294,8 @@ impl CreateDatabaseBuilder {
default_ddl_collation: self.default_ddl_collation,
storage_serialization_policy: self.storage_serialization_policy,
comment: self.comment,
+ default_charset: self.default_charset,
+ default_collation: self.default_collation,
catalog_sync: self.catalog_sync,
catalog_sync_namespace_mode: self.catalog_sync_namespace_mode,
catalog_sync_namespace_flatten_delimiter:
self.catalog_sync_namespace_flatten_delimiter,
@@ -302,6 +326,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
default_ddl_collation,
storage_serialization_policy,
comment,
+ default_charset,
+ default_collation,
catalog_sync,
catalog_sync_namespace_mode,
catalog_sync_namespace_flatten_delimiter,
@@ -323,6 +349,8 @@ impl TryFrom<Statement> for CreateDatabaseBuilder {
default_ddl_collation,
storage_serialization_policy,
comment,
+ default_charset,
+ default_collation,
catalog_sync,
catalog_sync_namespace_mode,
catalog_sync_namespace_flatten_delimiter,
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 1e626916..a26c14ef 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -4227,6 +4227,10 @@ pub enum Statement {
storage_serialization_policy: Option<StorageSerializationPolicy>,
/// Optional comment.
comment: Option<String>,
+ /// Optional default character set (MySQL).
+ default_charset: Option<String>,
+ /// Optional default collation (MySQL).
+ default_collation: Option<String>,
/// Optional catalog sync identifier.
catalog_sync: Option<String>,
/// Catalog sync namespace mode.
@@ -5081,6 +5085,8 @@ impl fmt::Display for Statement {
default_ddl_collation,
storage_serialization_policy,
comment,
+ default_charset,
+ default_collation,
catalog_sync,
catalog_sync_namespace_mode,
catalog_sync_namespace_flatten_delimiter,
@@ -5140,6 +5146,14 @@ impl fmt::Display for Statement {
write!(f, " COMMENT = '{comment}'")?;
}
+ if let Some(charset) = default_charset {
+ write!(f, " DEFAULT CHARACTER SET {charset}")?;
+ }
+
+ if let Some(collation) = default_collation {
+ write!(f, " DEFAULT COLLATE {collation}")?;
+ }
+
if let Some(sync) = catalog_sync {
write!(f, " CATALOG_SYNC = '{sync}'")?;
}
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index d9a5c518..bddafd1e 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -5341,6 +5341,34 @@ impl<'a> Parser<'a> {
None
};
+ // Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE
options
+ //
+ // Note: The docs only mention `CHARACTER SET`, but `CHARSET` is also
supported.
+ // Furthermore, MySQL will only accept one character set, raising an
error if there is more
+ // than one, but will accept multiple collations and use the last one.
+ //
+ // <https://dev.mysql.com/doc/refman/8.4/en/create-database.html>
+ let mut default_charset = None;
+ let mut default_collation = None;
+ loop {
+ let has_default = self.parse_keyword(Keyword::DEFAULT);
+ if default_charset.is_none() &&
self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET])
+ || self.parse_keyword(Keyword::CHARSET)
+ {
+ let _ = self.consume_token(&Token::Eq);
+ default_charset = Some(self.parse_identifier()?.value);
+ } else if self.parse_keyword(Keyword::COLLATE) {
+ let _ = self.consume_token(&Token::Eq);
+ default_collation = Some(self.parse_identifier()?.value);
+ } else if has_default {
+ // DEFAULT keyword not followed by CHARACTER SET, CHARSET, or
COLLATE
+ self.prev_token();
+ break;
+ } else {
+ break;
+ }
+ }
+
Ok(Statement::CreateDatabase {
db_name,
if_not_exists: ine,
@@ -5357,6 +5385,8 @@ impl<'a> Parser<'a> {
default_ddl_collation: None,
storage_serialization_policy: None,
comment: None,
+ default_charset,
+ default_collation,
catalog_sync: None,
catalog_sync_namespace_mode: None,
catalog_sync_namespace_flatten_delimiter: None,
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index b719f2ef..d1e718f4 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -4621,3 +4621,96 @@ fn test_optimizer_hints() {
DELETE /*+ foobar */ FROM table_name",
);
}
+
+#[test]
+fn parse_create_database_with_charset() {
+ // Test DEFAULT CHARACTER SET with = sign
+ mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER
SET utf8mb4");
+
+ // Test DEFAULT CHARACTER SET without = sign (normalized form)
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4",
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+ );
+
+ // Test CHARACTER SET without DEFAULT
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb CHARACTER SET utf8mb4",
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+ );
+
+ // Test CHARSET shorthand
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb CHARSET utf8mb4",
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+ );
+
+ // Test DEFAULT CHARSET shorthand
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb DEFAULT CHARSET utf8mb4",
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4",
+ );
+
+ // Test DEFAULT COLLATE
+ mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE
utf8mb4_unicode_ci");
+
+ // Test COLLATE without DEFAULT
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci",
+ "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci",
+ );
+
+ // Test both CHARACTER SET and COLLATE together
+ mysql_and_generic().verified_stmt(
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE
utf8mb4_unicode_ci",
+ );
+
+ // Test IF NOT EXISTS with CHARACTER SET
+ mysql_and_generic()
+ .verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER
SET utf16");
+
+ // Test the exact syntax from the issue
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16",
+ "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16",
+ );
+}
+
+#[test]
+fn parse_create_database_with_charset_errors() {
+ // Missing charset name after CHARACTER SET
+ assert!(mysql_and_generic()
+ .parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET")
+ .is_err());
+
+ // Missing charset name after CHARSET
+ assert!(mysql_and_generic()
+ .parse_sql_statements("CREATE DATABASE mydb CHARSET")
+ .is_err());
+
+ // Missing collation name after COLLATE
+ assert!(mysql_and_generic()
+ .parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE")
+ .is_err());
+
+ // Equals sign but no value
+ assert!(mysql_and_generic()
+ .parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =")
+ .is_err());
+}
+
+#[test]
+fn parse_create_database_with_charset_option_ordering() {
+ // MySQL allows COLLATE before CHARACTER SET - output is normalized to
CHARACTER SET first
+ // (matches MySQL's own SHOW CREATE DATABASE output order)
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT
CHARACTER SET utf8mb4",
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE
utf8mb4_unicode_ci",
+ );
+
+ // COLLATE first without DEFAULT keywords
+ mysql_and_generic().one_statement_parses_to(
+ "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET
utf8mb4",
+ "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE
utf8mb4_unicode_ci",
+ );
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]