On Tue, Sep 3, 2024 at 5:41 PM Peter Eisentraut <pe...@eisentraut.org> wrote: > > > I like this patch version (v4). It's the simplest, probably also > easiest to backpatch. >
I am actually confused. In this email thread [1], I listed 3 corn cases. I thought all these 3 corner cases should not be allowed. but V4 didn't solve these corner case issues. what do you think of their corner case, should it be allowed? Anyway, I thought these corner cases should not be allowed to happen, so I made sure PK, FK ties related collation were deterministic. PK can have indeterministic collation as long as it does not interact with FK. [1] https://www.postgresql.org/message-id/CACJufxEW6OMBqt8cbr%3D3Jt%2B%2BZd_SL-4YDjfk7Q7DhGKiSLcu4g%40mail.gmail.com
From 024d818e0e286af10b74649bd3156836d6f28447 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Wed, 4 Sep 2024 14:31:43 +0800 Subject: [PATCH v5 1/1] ensure PK-FK ties related collation indeterministic PK can have indeterministic collation as long as it not interact with FK. To make the PK-FK tie consistent, we need to ensure both side's collation is deterministic, otherwise, the following two some anomalitys can happen. 1. PK side collation is indeterministic, multiple form FK values equal to one PK value. 2. FK side collation is indeterministic, multiple form PK values equal to one FK value. discussion: https://postgr.es/m/78d824e0-b21e-480d-a252-e4b84bc2c24b%40illuminatedcomputing.com --- src/backend/commands/tablecmds.c | 65 ++++++++ .../regress/expected/collate.icu.utf8.out | 142 +++++++----------- src/test/regress/sql/collate.icu.utf8.sql | 60 ++++---- 3 files changed, 151 insertions(+), 116 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index b3cc6f8f69..6c951ae36e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -9705,6 +9705,71 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, (errcode(ERRCODE_INVALID_FOREIGN_KEY), errmsg("number of referencing and referenced columns for foreign key disagree"))); + for (i = 0; i < numpks; i++) + { + int32 keycoltypmod; + Oid keycoltype; + Oid keycolcollation; + Oid typcollation; + bool col_coll_deterministic = true; + + /* + * for both foreign key and primary key, we need make sure the specified + * keys collation (via COLLATE clause) is deterministic. we also need to + * ensure the key's typcollation is indeterministic (for domain type). + * + * PK and FK both are not indeterministic, then anomality can happen: + * 1. PK side collation is indeterministic, multiple form FK values + * equal to one PK value. + * 2. FK side collation is indeterministic, multiple form PK values + * equal to one FK value. + * + */ + + /* checking PK collation deterministic */ + get_atttypetypmodcoll(RelationGetRelid(pkrel), pkattnum[i], + &keycoltype, &keycoltypmod, + &keycolcollation); + + typcollation = get_typcollation(keycoltype); + if (OidIsValid(typcollation)) + col_coll_deterministic = get_collation_isdeterministic(typcollation); + + if (col_coll_deterministic && OidIsValid(keycolcollation)) + col_coll_deterministic = get_collation_isdeterministic(keycolcollation); + + if (!col_coll_deterministic) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("foreign key constraint \"%s\" cannot be implemented", + fkconstraint->conname), + errdetail("Cannot have indeterministic collations in primary key for referenced table \"%s\".", + RelationGetRelationName(pkrel)), + errhint("You may need to make primary key constraint's collation deterministic."))); + + + /* checking PK collation deterministic */ + get_atttypetypmodcoll(RelationGetRelid(rel), fkattnum[i], + &keycoltype, &keycoltypmod, + &keycolcollation); + + typcollation = get_typcollation(keycoltype); + if (OidIsValid(typcollation)) + col_coll_deterministic = get_collation_isdeterministic(typcollation); + + if (col_coll_deterministic && OidIsValid(keycolcollation)) + col_coll_deterministic = get_collation_isdeterministic(keycolcollation); + + if (!col_coll_deterministic) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("foreign key constraint \"%s\" cannot be implemented", + fkconstraint->conname), + errdetail("Foreign key constraint cannot use indeterministic collation for referencing table \"%s\".", + RelationGetRelationName(rel)), + errhint("You may need to make the foreign key constraint's collation deterministic."))); + } + /* * On the strength of a previous constraint, we might avoid scanning * tables to validate this one. See below. diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 31345295c1..5074812099 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1841,100 +1841,62 @@ SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; (1 row) -- foreign keys (should use collation of primary key) --- PK is case-sensitive, FK is case-insensitive +CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1'); +CREATE DOMAIN text_d_ignore_accent_case AS text COLLATE ignore_accent_case; +-- PK is case-sensitive (deterministic), FK is case-insensitive (indeterministic). +-- not OK, multiple PK-side value equal to one FK-side value not allowed +-- e.g. not allowed case: PK values {'a', 'A"} both equal to FK value {'A'} CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); -INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); -INSERT INTO test10fk VALUES ('abc'); -- ok -INSERT INTO test10fk VALUES ('ABC'); -- error -ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" -DETAIL: Key (x)=(ABC) is not present in table "test10pk". -INSERT INTO test10fk VALUES ('xyz'); -- error -ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" -DETAIL: Key (x)=(xyz) is not present in table "test10pk". -SELECT * FROM test10pk; - x ------ - abc - def - ghi -(3 rows) - -SELECT * FROM test10fk; - x ------ - abc -(1 row) - --- restrict update even though the values are "equal" in the FK table -UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error -ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" -DETAIL: Key (x)=(ABC) is not present in table "test10pk". -SELECT * FROM test10fk; - x ------ - abc -(1 row) - -DELETE FROM test10pk WHERE x = 'abc'; -SELECT * FROM test10pk; - x ------ - def - ghi -(2 rows) - -SELECT * FROM test10fk; - x ---- -(0 rows) - --- PK is case-insensitive, FK is case-sensitive +ERROR: foreign key constraint "test10fk_x_fkey" cannot be implemented +DETAIL: Foreign key constraint cannot use indeterministic collation for referencing table "test10fk". +HINT: You may need to make the foreign key constraint's collation deterministic. +-- PK is case-insensitive (indeterministic), FK is case-sensitive (deterministic) +-- more than one referencing (FK) row value equal to referenced (PK) row should not allowed +-- e.g. not allowed case: FK values {'a', 'A"} both equal to PK value {'A'} CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); -INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); -INSERT INTO test11fk VALUES ('abc'); -- ok -INSERT INTO test11fk VALUES ('ABC'); -- ok -INSERT INTO test11fk VALUES ('xyz'); -- error -ERROR: insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey" -DETAIL: Key (x)=(xyz) is not present in table "test11pk". -SELECT * FROM test11pk; - x ------ - abc - def - ghi -(3 rows) - -SELECT * FROM test11fk; - x ------ - abc - ABC -(2 rows) - --- cascade update even though the values are "equal" in the PK table -UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; -SELECT * FROM test11fk; - x ------ - ABC - ABC -(2 rows) - -DELETE FROM test11pk WHERE x = 'abc'; -SELECT * FROM test11pk; - x ------ - def - ghi -(2 rows) - -SELECT * FROM test11fk; - x ---- -(0 rows) - +ERROR: foreign key constraint "test11fk_x_fkey" cannot be implemented +DETAIL: Cannot have indeterministic collations in primary key for referenced table "test11pk". +HINT: You may need to make primary key constraint's collation deterministic. +drop table test11pk; +CREATE TABLE test12pk (x text PRIMARY KEY); +CREATE TABLE test12fk (x text REFERENCES test12pk on update cascade on delete cascade); +--alter table change primary key's collation to indeterministic, should fail. +alter table test12pk alter column x set data type text COLLATE ignore_accent_case; +ERROR: foreign key constraint "test12fk_x_fkey" cannot be implemented +DETAIL: Cannot have indeterministic collations in primary key for referenced table "test12pk". +HINT: You may need to make primary key constraint's collation deterministic. +alter table test12pk alter column x set data type text_d_ignore_accent_case; +ERROR: foreign key constraint "test12fk_x_fkey" cannot be implemented +DETAIL: Cannot have indeterministic collations in primary key for referenced table "test12pk". +HINT: You may need to make primary key constraint's collation deterministic. +---should also fail. +alter table test12pk alter column x set data type text_d_ignore_accent_case COLLATE "C"; +ERROR: foreign key constraint "test12fk_x_fkey" cannot be implemented +DETAIL: Cannot have indeterministic collations in primary key for referenced table "test12pk". +HINT: You may need to make primary key constraint's collation deterministic. +drop table test12pk,test12fk; +--alter table change foreign key's collation, all these case should fail. +CREATE TABLE test13pk (x text PRIMARY KEY); +CREATE TABLE test13fk (x text REFERENCES test13pk on update cascade on delete cascade); +alter table test13fk alter column x set data type text COLLATE ignore_accent_case; +ERROR: foreign key constraint "test13fk_x_fkey" cannot be implemented +DETAIL: Foreign key constraint cannot use indeterministic collation for referencing table "test13fk". +HINT: You may need to make the foreign key constraint's collation deterministic. +alter table test13fk alter column x set data type text_d_ignore_accent_case; +ERROR: foreign key constraint "test13fk_x_fkey" cannot be implemented +DETAIL: Foreign key constraint cannot use indeterministic collation for referencing table "test13fk". +HINT: You may need to make the foreign key constraint's collation deterministic. +alter table test13fk alter column x set data type text_d_ignore_accent_case COLLATE ignore_accent_case; +ERROR: foreign key constraint "test13fk_x_fkey" cannot be implemented +DETAIL: Foreign key constraint cannot use indeterministic collation for referencing table "test13fk". +HINT: You may need to make the foreign key constraint's collation deterministic. +alter table test13fk alter column x set data type text_d_ignore_accent_case COLLATE "C"; +ERROR: foreign key constraint "test13fk_x_fkey" cannot be implemented +DETAIL: Foreign key constraint cannot use indeterministic collation for referencing table "test13fk". +HINT: You may need to make the foreign key constraint's collation deterministic. +drop table test13pk,test13fk; -- partitioning CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 80f28a97d7..ad418e87eb 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -688,38 +688,46 @@ SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sens SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; -- foreign keys (should use collation of primary key) +CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1'); +CREATE DOMAIN text_d_ignore_accent_case AS text COLLATE ignore_accent_case; --- PK is case-sensitive, FK is case-insensitive +-- PK is case-sensitive (deterministic), FK is case-insensitive (indeterministic). +-- not OK, multiple PK-side value equal to one FK-side value not allowed +-- e.g. not allowed case: PK values {'a', 'A"} both equal to FK value {'A'} CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); -INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); -INSERT INTO test10fk VALUES ('abc'); -- ok -INSERT INTO test10fk VALUES ('ABC'); -- error -INSERT INTO test10fk VALUES ('xyz'); -- error -SELECT * FROM test10pk; -SELECT * FROM test10fk; --- restrict update even though the values are "equal" in the FK table -UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error -SELECT * FROM test10fk; -DELETE FROM test10pk WHERE x = 'abc'; -SELECT * FROM test10pk; -SELECT * FROM test10fk; --- PK is case-insensitive, FK is case-sensitive + +-- PK is case-insensitive (indeterministic), FK is case-sensitive (deterministic) +-- more than one referencing (FK) row value equal to referenced (PK) row should not allowed +-- e.g. not allowed case: FK values {'a', 'A"} both equal to PK value {'A'} CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); -INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); -INSERT INTO test11fk VALUES ('abc'); -- ok -INSERT INTO test11fk VALUES ('ABC'); -- ok -INSERT INTO test11fk VALUES ('xyz'); -- error -SELECT * FROM test11pk; -SELECT * FROM test11fk; --- cascade update even though the values are "equal" in the PK table -UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; -SELECT * FROM test11fk; -DELETE FROM test11pk WHERE x = 'abc'; -SELECT * FROM test11pk; -SELECT * FROM test11fk; +drop table test11pk; + + + +CREATE TABLE test12pk (x text PRIMARY KEY); +CREATE TABLE test12fk (x text REFERENCES test12pk on update cascade on delete cascade); + +--alter table change primary key's collation to indeterministic, should fail. +alter table test12pk alter column x set data type text COLLATE ignore_accent_case; +alter table test12pk alter column x set data type text_d_ignore_accent_case; + +---should also fail. +alter table test12pk alter column x set data type text_d_ignore_accent_case COLLATE "C"; +drop table test12pk,test12fk; + + + +--alter table change foreign key's collation, all these case should fail. +CREATE TABLE test13pk (x text PRIMARY KEY); +CREATE TABLE test13fk (x text REFERENCES test13pk on update cascade on delete cascade); +alter table test13fk alter column x set data type text COLLATE ignore_accent_case; +alter table test13fk alter column x set data type text_d_ignore_accent_case; +alter table test13fk alter column x set data type text_d_ignore_accent_case COLLATE ignore_accent_case; +alter table test13fk alter column x set data type text_d_ignore_accent_case COLLATE "C"; +drop table test13pk,test13fk; -- partitioning CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); base-commit: 31a98934d1699007b50aefaedd68ab4d6b42e3b4 -- 2.34.1