I wrote: > While fooling with the idea of making type "name" collation > aware, it occurred to me that there's a better, more general > answer, which is to insist that collation-aware system catalog > columns must be marked with C collation. This rule would apply > without modification to both "text" and "name" columns. In the > wake of commit 5e0928005, it also means that pg_statistic data > for such a column would port safely across a database collation > change, which up to now it does not. And I think we could have > the bootstrap code apply the rule automatically, making for one > less way to screw up when changing catalog definitions.
Concretely, this ... regards, tom lane
diff --git a/src/backend/access/common/scankey.c b/src/backend/access/common/scankey.c index 781516c..5be4fe8 100644 *** a/src/backend/access/common/scankey.c --- b/src/backend/access/common/scankey.c *************** ScanKeyEntryInitialize(ScanKey entry, *** 64,72 **** * It cannot handle NULL arguments, unary operators, or nondefault operators, * but we need none of those features for most hardwired lookups. * ! * We set collation to DEFAULT_COLLATION_OID always. This is appropriate ! * for textual columns in system catalogs, and it will be ignored for ! * non-textual columns, so it's not worth trying to be more finicky. * * Note: CurrentMemoryContext at call should be as long-lived as the ScanKey * itself, because that's what will be used for any subsidiary info attached --- 64,72 ---- * It cannot handle NULL arguments, unary operators, or nondefault operators, * but we need none of those features for most hardwired lookups. * ! * We set collation to C_COLLATION_OID always. This is the correct value ! * for all collation-aware columns in system catalogs, and it will be ignored ! * for other column types, so it's not worth trying to be more finicky. * * Note: CurrentMemoryContext at call should be as long-lived as the ScanKey * itself, because that's what will be used for any subsidiary info attached *************** ScanKeyInit(ScanKey entry, *** 83,89 **** entry->sk_attno = attributeNumber; entry->sk_strategy = strategy; entry->sk_subtype = InvalidOid; ! entry->sk_collation = DEFAULT_COLLATION_OID; entry->sk_argument = argument; fmgr_info(procedure, &entry->sk_func); } --- 83,89 ---- entry->sk_attno = attributeNumber; entry->sk_strategy = strategy; entry->sk_subtype = InvalidOid; ! entry->sk_collation = C_COLLATION_OID; entry->sk_argument = argument; fmgr_info(procedure, &entry->sk_func); } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 7caab64..8e42255 100644 *** a/src/backend/bootstrap/bootstrap.c --- b/src/backend/bootstrap/bootstrap.c *************** DefineAttr(char *name, char *type, int a *** 744,749 **** --- 744,758 ---- attrtypes[attnum]->attndims = 0; } + /* + * If a system catalog column is collation-aware, force it to use C + * collation, so that its behavior is independent of the database's + * collation. This is essential to allow template0 to be cloned with a + * different database collation. + */ + if (OidIsValid(attrtypes[attnum]->attcollation)) + attrtypes[attnum]->attcollation = C_COLLATION_OID; + attrtypes[attnum]->attstattarget = -1; attrtypes[attnum]->attcacheoff = -1; attrtypes[attnum]->atttypmod = -1; diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 8e2a248..115e4c6 100644 *** a/src/backend/catalog/genbki.pl --- b/src/backend/catalog/genbki.pl *************** my $GenbkiNextOid = $FirstGenbkiObjectId *** 167,172 **** --- 167,175 ---- my $BOOTSTRAP_SUPERUSERID = Catalog::FindDefinedSymbolFromData($catalog_data{pg_authid}, 'BOOTSTRAP_SUPERUSERID'); + my $C_COLLATION_OID = + Catalog::FindDefinedSymbolFromData($catalog_data{pg_collation}, + 'C_COLLATION_OID'); my $PG_CATALOG_NAMESPACE = Catalog::FindDefinedSymbolFromData($catalog_data{pg_namespace}, 'PG_CATALOG_NAMESPACE'); *************** sub morph_row_for_pgattr *** 693,699 **** # set attndims if it's an array type $row->{attndims} = $type->{typcategory} eq 'A' ? '1' : '0'; ! $row->{attcollation} = $type->{typcollation}; if (defined $attr->{forcenotnull}) { --- 696,705 ---- # set attndims if it's an array type $row->{attndims} = $type->{typcategory} eq 'A' ? '1' : '0'; ! ! # collation-aware catalog columns must use C collation ! $row->{attcollation} = $type->{typcollation} != 0 ? ! $C_COLLATION_OID : 0; if (defined $attr->{forcenotnull}) { diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index b31fd5a..fdbd6a0 100644 *** a/src/backend/utils/cache/catcache.c --- b/src/backend/utils/cache/catcache.c *************** *** 22,27 **** --- 22,28 ---- #include "access/tuptoaster.h" #include "access/valid.h" #include "access/xact.h" + #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "miscadmin.h" *************** CatalogCacheInitializeCache(CatCache *ca *** 1014,1021 **** /* Fill in sk_strategy as well --- always standard equality */ cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber; cache->cc_skey[i].sk_subtype = InvalidOid; ! /* Currently, there are no catcaches on collation-aware data types */ ! cache->cc_skey[i].sk_collation = InvalidOid; CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p", cache->cc_relname, --- 1015,1022 ---- /* Fill in sk_strategy as well --- always standard equality */ cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber; cache->cc_skey[i].sk_subtype = InvalidOid; ! /* If a catcache key requires a collation, it must be C collation */ ! cache->cc_skey[i].sk_collation = C_COLLATION_OID; CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p", cache->cc_relname, diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 2359b4c..f0c52d9 100644 *** a/src/include/catalog/indexing.h --- b/src/include/catalog/indexing.h *************** DECLARE_UNIQUE_INDEX(pg_default_acl_oid_ *** 310,319 **** DECLARE_UNIQUE_INDEX(pg_db_role_setting_databaseid_rol_index, 2965, on pg_db_role_setting using btree(setdatabase oid_ops, setrole oid_ops)); #define DbRoleSettingDatidRolidIndexId 2965 ! DECLARE_UNIQUE_INDEX(pg_seclabel_object_index, 3597, on pg_seclabel using btree(objoid oid_ops, classoid oid_ops, objsubid int4_ops, provider text_pattern_ops)); #define SecLabelObjectIndexId 3597 ! DECLARE_UNIQUE_INDEX(pg_shseclabel_object_index, 3593, on pg_shseclabel using btree(objoid oid_ops, classoid oid_ops, provider text_pattern_ops)); #define SharedSecLabelObjectIndexId 3593 DECLARE_UNIQUE_INDEX(pg_extension_oid_index, 3080, on pg_extension using btree(oid oid_ops)); --- 310,319 ---- DECLARE_UNIQUE_INDEX(pg_db_role_setting_databaseid_rol_index, 2965, on pg_db_role_setting using btree(setdatabase oid_ops, setrole oid_ops)); #define DbRoleSettingDatidRolidIndexId 2965 ! DECLARE_UNIQUE_INDEX(pg_seclabel_object_index, 3597, on pg_seclabel using btree(objoid oid_ops, classoid oid_ops, objsubid int4_ops, provider text_ops)); #define SecLabelObjectIndexId 3597 ! DECLARE_UNIQUE_INDEX(pg_shseclabel_object_index, 3593, on pg_shseclabel using btree(objoid oid_ops, classoid oid_ops, provider text_ops)); #define SharedSecLabelObjectIndexId 3593 DECLARE_UNIQUE_INDEX(pg_extension_oid_index, 3080, on pg_extension using btree(oid oid_ops)); *************** DECLARE_UNIQUE_INDEX(pg_policy_polrelid_ *** 333,339 **** DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replication_origin using btree(roident oid_ops)); #define ReplicationOriginIdentIndex 6001 ! DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops)); #define ReplicationOriginNameIndex 6002 DECLARE_UNIQUE_INDEX(pg_partitioned_table_partrelid_index, 3351, on pg_partitioned_table using btree(partrelid oid_ops)); --- 333,339 ---- DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replication_origin using btree(roident oid_ops)); #define ReplicationOriginIdentIndex 6001 ! DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_ops)); #define ReplicationOriginNameIndex 6002 DECLARE_UNIQUE_INDEX(pg_partitioned_table_partrelid_index, 3351, on pg_partitioned_table using btree(partrelid oid_ops)); diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 6072f6b..52cd7b9 100644 *** a/src/test/regress/expected/opr_sanity.out --- b/src/test/regress/expected/opr_sanity.out *************** ORDER BY 1; *** 2060,2077 **** -- a representational error in pg_index, but simply wrong catalog design. -- It's bad because we expect to be able to clone template0 and assign the -- copy a different database collation. It would especially not work for ! -- shared catalogs. Note that although text columns will show a collation ! -- in indcollation, they're still okay to index with text_pattern_ops, ! -- so allow that case. SELECT indexrelid::regclass, indrelid::regclass, iclass, icoll FROM (SELECT indexrelid, indrelid, unnest(indclass) as iclass, unnest(indcollation) as icoll FROM pg_index WHERE indrelid < 16384) ss ! WHERE icoll != 0 AND iclass != ! (SELECT oid FROM pg_opclass ! WHERE opcname = 'text_pattern_ops' AND opcmethod = ! (SELECT oid FROM pg_am WHERE amname = 'btree')); indexrelid | indrelid | iclass | icoll ------------+----------+--------+------- (0 rows) --- 2060,2073 ---- -- a representational error in pg_index, but simply wrong catalog design. -- It's bad because we expect to be able to clone template0 and assign the -- copy a different database collation. It would especially not work for ! -- shared catalogs. Collation-sensitive indexes should have "C" collation. SELECT indexrelid::regclass, indrelid::regclass, iclass, icoll FROM (SELECT indexrelid, indrelid, unnest(indclass) as iclass, unnest(indcollation) as icoll FROM pg_index WHERE indrelid < 16384) ss ! WHERE icoll != 0 AND ! icoll != (SELECT oid FROM pg_collation WHERE collname = 'C'); indexrelid | indrelid | iclass | icoll ------------+----------+--------+------- (0 rows) diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 91c68f4..5cf4df1 100644 *** a/src/test/regress/sql/opr_sanity.sql --- b/src/test/regress/sql/opr_sanity.sql *************** ORDER BY 1; *** 1333,1348 **** -- a representational error in pg_index, but simply wrong catalog design. -- It's bad because we expect to be able to clone template0 and assign the -- copy a different database collation. It would especially not work for ! -- shared catalogs. Note that although text columns will show a collation ! -- in indcollation, they're still okay to index with text_pattern_ops, ! -- so allow that case. SELECT indexrelid::regclass, indrelid::regclass, iclass, icoll FROM (SELECT indexrelid, indrelid, unnest(indclass) as iclass, unnest(indcollation) as icoll FROM pg_index WHERE indrelid < 16384) ss ! WHERE icoll != 0 AND iclass != ! (SELECT oid FROM pg_opclass ! WHERE opcname = 'text_pattern_ops' AND opcmethod = ! (SELECT oid FROM pg_am WHERE amname = 'btree')); --- 1333,1344 ---- -- a representational error in pg_index, but simply wrong catalog design. -- It's bad because we expect to be able to clone template0 and assign the -- copy a different database collation. It would especially not work for ! -- shared catalogs. Collation-sensitive indexes should have "C" collation. SELECT indexrelid::regclass, indrelid::regclass, iclass, icoll FROM (SELECT indexrelid, indrelid, unnest(indclass) as iclass, unnest(indcollation) as icoll FROM pg_index WHERE indrelid < 16384) ss ! WHERE icoll != 0 AND ! icoll != (SELECT oid FROM pg_collation WHERE collname = 'C');