On 2022-09-15 09:52, Kyotaro Horiguchi wrote:
If I executed initdb as follows, I would be told to specify
--icu-locale option.
$ initdb --encoding sql-ascii --locale-provider icu hoge
...
initdb: error: ICU locale must be specified
However, when I reran the command, it complains about incompatible
encoding this time. I think it's more user-friendly to check for the
encoding compatibility before the check for missing --icu-locale
option.
regards.
I agree with you. Here's another version of the patch. The
locale/encoding checks and reports in initdb have been reordered,
because now the encoding is set first and only then the ICU locale is
checked.
P.S. While working on the patch, I discovered that UTF8 encoding is
always used for the ICU provider in initdb unless it is explicitly
specified by the user:
if (!encoding && locale_provider == COLLPROVIDER_ICU)
encodingid = PG_UTF8;
IMO this creates additional errors for locales with other encodings:
$ initdb --locale de_DE.iso885915@euro --locale-provider icu
--icu-locale de-DE
...
initdb: error: encoding mismatch
initdb: detail: The encoding you selected (UTF8) and the encoding that
the selected locale uses (LATIN9) do not match. This would lead to
misbehavior in various character string processing functions.
initdb: hint: Rerun initdb and either do not specify an encoding
explicitly, or choose a matching combination.
And ICU supports many encodings, see the contents of pg_enc2icu_tbl in
encnames.c...
--
Marina Polyakova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 6ff48bb18f3639ae45d9528b32df51a4aebc60c0..f248ad42b77c8c0cf2089963d4357b120914ce20 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1034,6 +1034,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (dblocprovider == COLLPROVIDER_ICU)
{
+ if (!(is_encoding_supported_by_icu(encoding)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("encoding \"%s\" is not supported with ICU provider",
+ pg_encoding_to_char(encoding))));
+
/*
* This would happen if template0 uses the libc provider but the new
* database uses icu.
@@ -1042,10 +1048,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ICU locale must be specified")));
- }
- if (dblocprovider == COLLPROVIDER_ICU)
check_icu_locale(dbiculocale);
+ }
/*
* Check that the new encoding and locale settings match the source
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 6aeec8d426c52414b827686781c245291f27ed1f..999e7c2bf8dc707063eddf19a5c27eed03d3ca09 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2092,20 +2092,30 @@ setlocales(void)
check_locale_name(LC_CTYPE, lc_messages, &canonname);
lc_messages = canonname;
#endif
+}
- if (locale_provider == COLLPROVIDER_ICU)
+static void
+check_icu_locale_encoding(void)
+{
+ if (!(is_encoding_supported_by_icu(encodingid)))
{
- if (!icu_locale)
- pg_fatal("ICU locale must be specified");
+ pg_log_error("encoding \"%s\" is not supported with ICU provider",
+ pg_encoding_to_char(encodingid));
+ pg_log_error_hint("Rerun %s and choose a matching combination.",
+ progname);
+ exit(1);
+ }
- /*
- * In supported builds, the ICU locale ID will be checked by the
- * backend during post-bootstrap initialization.
- */
+ if (!icu_locale)
+ pg_fatal("ICU locale must be specified");
+
+ /*
+ * In supported builds, the ICU locale ID will be checked by the backend
+ * during post-bootstrap initialization.
+ */
#ifndef USE_ICU
- pg_fatal("ICU is not supported in this build");
+ pg_fatal("ICU is not supported in this build");
#endif
- }
}
/*
@@ -2281,34 +2291,6 @@ setup_locale_encoding(void)
{
setlocales();
- if (locale_provider == COLLPROVIDER_LIBC &&
- strcmp(lc_ctype, lc_collate) == 0 &&
- strcmp(lc_ctype, lc_time) == 0 &&
- strcmp(lc_ctype, lc_numeric) == 0 &&
- strcmp(lc_ctype, lc_monetary) == 0 &&
- strcmp(lc_ctype, lc_messages) == 0 &&
- (!icu_locale || strcmp(lc_ctype, icu_locale) == 0))
- printf(_("The database cluster will be initialized with locale \"%s\".\n"), lc_ctype);
- else
- {
- printf(_("The database cluster will be initialized with this locale configuration:\n"));
- printf(_(" provider: %s\n"), collprovider_name(locale_provider));
- if (icu_locale)
- printf(_(" ICU locale: %s\n"), icu_locale);
- printf(_(" LC_COLLATE: %s\n"
- " LC_CTYPE: %s\n"
- " LC_MESSAGES: %s\n"
- " LC_MONETARY: %s\n"
- " LC_NUMERIC: %s\n"
- " LC_TIME: %s\n"),
- lc_collate,
- lc_ctype,
- lc_messages,
- lc_monetary,
- lc_numeric,
- lc_time);
- }
-
if (!encoding && locale_provider == COLLPROVIDER_ICU)
encodingid = PG_UTF8;
else if (!encoding)
@@ -2350,11 +2332,7 @@ setup_locale_encoding(void)
#endif
}
else
- {
encodingid = ctype_enc;
- printf(_("The default database encoding has accordingly been set to \"%s\".\n"),
- pg_encoding_to_char(encodingid));
- }
}
else
encodingid = get_encoding_id(encoding);
@@ -2362,6 +2340,39 @@ setup_locale_encoding(void)
if (!check_locale_encoding(lc_ctype, encodingid) ||
!check_locale_encoding(lc_collate, encodingid))
exit(1); /* check_locale_encoding printed the error */
+
+ if (locale_provider == COLLPROVIDER_ICU)
+ check_icu_locale_encoding();
+
+ if (locale_provider == COLLPROVIDER_LIBC &&
+ strcmp(lc_ctype, lc_collate) == 0 &&
+ strcmp(lc_ctype, lc_time) == 0 &&
+ strcmp(lc_ctype, lc_numeric) == 0 &&
+ strcmp(lc_ctype, lc_monetary) == 0 &&
+ strcmp(lc_ctype, lc_messages) == 0)
+ printf(_("The database cluster will be initialized with locale \"%s\".\n"), lc_ctype);
+ else
+ {
+ printf(_("The database cluster will be initialized with this locale configuration:\n"));
+ printf(_(" provider: %s\n"), collprovider_name(locale_provider));
+ if (icu_locale)
+ printf(_(" ICU locale: %s\n"), icu_locale);
+ printf(_(" LC_COLLATE: %s\n"
+ " LC_CTYPE: %s\n"
+ " LC_MESSAGES: %s\n"
+ " LC_MONETARY: %s\n"
+ " LC_NUMERIC: %s\n"
+ " LC_TIME: %s\n"),
+ lc_collate,
+ lc_ctype,
+ lc_messages,
+ lc_monetary,
+ lc_numeric,
+ lc_time);
+ }
+
+ printf(_("The default database encoding will be set to \"%s\".\n"),
+ pg_encoding_to_char(encodingid));
}
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index a37f6dd9b334b6ee22d9fdd4d51422795cb54a39..40d6d6305a858ec01587d184c69d4f28d0796d74 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -118,6 +118,15 @@ if ($ENV{with_icu} eq 'yes')
],
qr/FATAL: could not open collator for locale/,
'fails for invalid ICU locale');
+
+ command_fails_like(
+ [
+ 'initdb', '--no-sync',
+ '--locale-provider=icu', '--encoding=SQL_ASCII',
+ "$tempdir/dataX"
+ ],
+ qr/error: encoding "SQL_ASCII" is not supported with ICU provider/,
+ 'encoding "SQL_ASCII" is not supported with ICU provider');
}
else
{
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index e91c1d013d08d8bd1e3a92f2aba958c5c7713ca6..56c578d3d81b8e61b687a8b508b3b741d4df270a 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -50,6 +50,15 @@ if ($ENV{with_icu} eq 'yes')
],
'fails for invalid ICU locale');
+ $node->command_fails_like(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=icu',
+ '--encoding=SQL_ASCII', 'foobarX'
+ ],
+ qr/ERROR: encoding "SQL_ASCII" is not supported with ICU provider/,
+ 'encoding "SQL_ASCII" is not supported with ICU provider');
+
# additional node, which uses the icu provider
my $node2 = PostgreSQL::Test::Cluster->new('icu');
$node2->init(extra => ['--locale-provider=icu', '--icu-locale=en']);