On 2022-09-15 09:52, Kyotaro Horiguchi wrote:
If I executed initdb as follows, I would be told to specify
--icu-locale option.

$ initdb --encoding sql-ascii --locale-provider icu hoge
...
initdb: error: ICU locale must be specified

However, when I reran the command, it complains about incompatible
encoding this time.  I think it's more user-friendly to check for the
encoding compatibility before the check for missing --icu-locale
option.

regards.

I agree with you. Here's another version of the patch. The locale/encoding checks and reports in initdb have been reordered, because now the encoding is set first and only then the ICU locale is checked.

P.S. While working on the patch, I discovered that UTF8 encoding is always used for the ICU provider in initdb unless it is explicitly specified by the user:

if (!encoding && locale_provider == COLLPROVIDER_ICU)
        encodingid = PG_UTF8;

IMO this creates additional errors for locales with other encodings:

$ initdb --locale de_DE.iso885915@euro --locale-provider icu --icu-locale de-DE
...
initdb: error: encoding mismatch
initdb: detail: The encoding you selected (UTF8) and the encoding that the selected locale uses (LATIN9) do not match. This would lead to misbehavior in various character string processing functions. initdb: hint: Rerun initdb and either do not specify an encoding explicitly, or choose a matching combination.

And ICU supports many encodings, see the contents of pg_enc2icu_tbl in encnames.c...

--
Marina Polyakova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 6ff48bb18f3639ae45d9528b32df51a4aebc60c0..f248ad42b77c8c0cf2089963d4357b120914ce20 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1034,6 +1034,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
 	if (dblocprovider == COLLPROVIDER_ICU)
 	{
+		if (!(is_encoding_supported_by_icu(encoding)))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("encoding \"%s\" is not supported with ICU provider",
+							pg_encoding_to_char(encoding))));
+
 		/*
 		 * This would happen if template0 uses the libc provider but the new
 		 * database uses icu.
@@ -1042,10 +1048,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("ICU locale must be specified")));
-	}
 
-	if (dblocprovider == COLLPROVIDER_ICU)
 		check_icu_locale(dbiculocale);
+	}
 
 	/*
 	 * Check that the new encoding and locale settings match the source
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 6aeec8d426c52414b827686781c245291f27ed1f..999e7c2bf8dc707063eddf19a5c27eed03d3ca09 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2092,20 +2092,30 @@ setlocales(void)
 	check_locale_name(LC_CTYPE, lc_messages, &canonname);
 	lc_messages = canonname;
 #endif
+}
 
-	if (locale_provider == COLLPROVIDER_ICU)
+static void
+check_icu_locale_encoding(void)
+{
+	if (!(is_encoding_supported_by_icu(encodingid)))
 	{
-		if (!icu_locale)
-			pg_fatal("ICU locale must be specified");
+		pg_log_error("encoding \"%s\" is not supported with ICU provider",
+					 pg_encoding_to_char(encodingid));
+		pg_log_error_hint("Rerun %s and choose a matching combination.",
+						  progname);
+		exit(1);
+	}
 
-		/*
-		 * In supported builds, the ICU locale ID will be checked by the
-		 * backend during post-bootstrap initialization.
-		 */
+	if (!icu_locale)
+		pg_fatal("ICU locale must be specified");
+
+	/*
+	 * In supported builds, the ICU locale ID will be checked by the backend
+	 * during post-bootstrap initialization.
+	 */
 #ifndef USE_ICU
-		pg_fatal("ICU is not supported in this build");
+	pg_fatal("ICU is not supported in this build");
 #endif
-	}
 }
 
 /*
@@ -2281,34 +2291,6 @@ setup_locale_encoding(void)
 {
 	setlocales();
 
-	if (locale_provider == COLLPROVIDER_LIBC &&
-		strcmp(lc_ctype, lc_collate) == 0 &&
-		strcmp(lc_ctype, lc_time) == 0 &&
-		strcmp(lc_ctype, lc_numeric) == 0 &&
-		strcmp(lc_ctype, lc_monetary) == 0 &&
-		strcmp(lc_ctype, lc_messages) == 0 &&
-		(!icu_locale || strcmp(lc_ctype, icu_locale) == 0))
-		printf(_("The database cluster will be initialized with locale \"%s\".\n"), lc_ctype);
-	else
-	{
-		printf(_("The database cluster will be initialized with this locale configuration:\n"));
-		printf(_("  provider:    %s\n"), collprovider_name(locale_provider));
-		if (icu_locale)
-			printf(_("  ICU locale:  %s\n"), icu_locale);
-		printf(_("  LC_COLLATE:  %s\n"
-				 "  LC_CTYPE:    %s\n"
-				 "  LC_MESSAGES: %s\n"
-				 "  LC_MONETARY: %s\n"
-				 "  LC_NUMERIC:  %s\n"
-				 "  LC_TIME:     %s\n"),
-			   lc_collate,
-			   lc_ctype,
-			   lc_messages,
-			   lc_monetary,
-			   lc_numeric,
-			   lc_time);
-	}
-
 	if (!encoding && locale_provider == COLLPROVIDER_ICU)
 		encodingid = PG_UTF8;
 	else if (!encoding)
@@ -2350,11 +2332,7 @@ setup_locale_encoding(void)
 #endif
 		}
 		else
-		{
 			encodingid = ctype_enc;
-			printf(_("The default database encoding has accordingly been set to \"%s\".\n"),
-				   pg_encoding_to_char(encodingid));
-		}
 	}
 	else
 		encodingid = get_encoding_id(encoding);
@@ -2362,6 +2340,39 @@ setup_locale_encoding(void)
 	if (!check_locale_encoding(lc_ctype, encodingid) ||
 		!check_locale_encoding(lc_collate, encodingid))
 		exit(1);				/* check_locale_encoding printed the error */
+
+	if (locale_provider == COLLPROVIDER_ICU)
+		check_icu_locale_encoding();
+
+	if (locale_provider == COLLPROVIDER_LIBC &&
+		strcmp(lc_ctype, lc_collate) == 0 &&
+		strcmp(lc_ctype, lc_time) == 0 &&
+		strcmp(lc_ctype, lc_numeric) == 0 &&
+		strcmp(lc_ctype, lc_monetary) == 0 &&
+		strcmp(lc_ctype, lc_messages) == 0)
+		printf(_("The database cluster will be initialized with locale \"%s\".\n"), lc_ctype);
+	else
+	{
+		printf(_("The database cluster will be initialized with this locale configuration:\n"));
+		printf(_("  provider:    %s\n"), collprovider_name(locale_provider));
+		if (icu_locale)
+			printf(_("  ICU locale:  %s\n"), icu_locale);
+		printf(_("  LC_COLLATE:  %s\n"
+				 "  LC_CTYPE:    %s\n"
+				 "  LC_MESSAGES: %s\n"
+				 "  LC_MONETARY: %s\n"
+				 "  LC_NUMERIC:  %s\n"
+				 "  LC_TIME:     %s\n"),
+			   lc_collate,
+			   lc_ctype,
+			   lc_messages,
+			   lc_monetary,
+			   lc_numeric,
+			   lc_time);
+	}
+
+	printf(_("The default database encoding will be set to \"%s\".\n"),
+		   pg_encoding_to_char(encodingid));
 }
 
 
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index a37f6dd9b334b6ee22d9fdd4d51422795cb54a39..40d6d6305a858ec01587d184c69d4f28d0796d74 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -118,6 +118,15 @@ if ($ENV{with_icu} eq 'yes')
 		],
 		qr/FATAL:  could not open collator for locale/,
 		'fails for invalid ICU locale');
+
+	command_fails_like(
+		[
+			'initdb',                '--no-sync',
+			'--locale-provider=icu', '--encoding=SQL_ASCII',
+			"$tempdir/dataX"
+		],
+		qr/error: encoding "SQL_ASCII" is not supported with ICU provider/,
+		'encoding "SQL_ASCII" is not supported with ICU provider');
 }
 else
 {
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index e91c1d013d08d8bd1e3a92f2aba958c5c7713ca6..56c578d3d81b8e61b687a8b508b3b741d4df270a 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -50,6 +50,15 @@ if ($ENV{with_icu} eq 'yes')
 		],
 		'fails for invalid ICU locale');
 
+	$node->command_fails_like(
+		[
+			'createdb',             '-T',
+			'template0',            '--locale-provider=icu',
+			'--encoding=SQL_ASCII', 'foobarX'
+		],
+		qr/ERROR:  encoding "SQL_ASCII" is not supported with ICU provider/,
+		'encoding "SQL_ASCII" is not supported with ICU provider');
+
 	# additional node, which uses the icu provider
 	my $node2 = PostgreSQL::Test::Cluster->new('icu');
 	$node2->init(extra => ['--locale-provider=icu', '--icu-locale=en']);

Reply via email to