I wrote: > Andres Freund <and...@anarazel.de> writes: >> On 2018-12-14 14:25:30 -0500, Tom Lane wrote: >>> Now, it's certainly true that nameeq() doesn't need a collation spec >>> today, any more than texteq() does, because both types legislate that >>> equality is bitwise. But if we leave ExecBuildGroupingEqual like this, >>> we're mandating that no type anywhere, ever, can have a >>> collation-dependent notion of equality. Is that really a restriction >>> we're comfortable with? citext is sort of the poster child here, >>> because it already wishes it could have collation-dependent equality.
>> Don't we rely on that in other places too? > Possibly; the regression tests probably don't stress type "name" too hard, > so my Assert might well not be finding some other code paths that do > likewise. To investigate this a bit more, I added a similar Assert in texteq(), and also grepped for places that were using FunctionCall2 to call equality functions. The attached patch shows what I had to change to get check-world to pass. It's possible that there are one or two more places I missed --- two of these changes were found by grepping but were *not* exposed by regression testing. But this ought to be a pretty good indication of the scope of the problem. There are a couple of places touched here that know they are invoking texteq specifically, so we wouldn't really need to change them to have a working feature. In all I found six places that we'd need to deal with if we want to support collation-dependent equality. A possible medium-term compromise is to pass DEFAULT_COLLATION_OID, as I've done here, so that at least a collation-examining equality function won't fall over. A variant of that is to pass C_COLLATION_OID, which presumably would be faster to execute in many cases; it would also have the advantage of being database-independent, which might be a good thing in execReplication.c for instance. Or we could just decide that collation-dependent equality is a bridge too far for today. I'm not hugely excited about pursuing it myself, I just wanted to get a handle on how badly broken it is. regards, tom lane
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index d9087ca..fa00a36 100644 *** a/src/backend/executor/execExpr.c --- b/src/backend/executor/execExpr.c *************** *** 32,37 **** --- 32,38 ---- #include "access/nbtree.h" #include "catalog/objectaccess.h" + #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "executor/execExpr.h" #include "executor/nodeSubplan.h" *************** ExecBuildGroupingEqual(TupleDesc ldesc, *** 3393,3399 **** fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, ! InvalidOid, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; --- 3394,3400 ---- fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, ! DEFAULT_COLLATION_OID, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 5bd3bbc..cd40071 100644 *** a/src/backend/executor/execReplication.c --- b/src/backend/executor/execReplication.c *************** *** 17,22 **** --- 17,23 ---- #include "access/relscan.h" #include "access/transam.h" #include "access/xact.h" + #include "catalog/pg_collation.h" #include "commands/trigger.h" #include "executor/executor.h" #include "nodes/nodeFuncs.h" *************** tuple_equals_slot(TupleDesc desc, HeapTu *** 265,273 **** errmsg("could not identify an equality operator for type %s", format_type_be(att->atttypid)))); ! if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo, ! values[attrnum], ! slot->tts_values[attrnum]))) return false; } --- 266,275 ---- errmsg("could not identify an equality operator for type %s", format_type_be(att->atttypid)))); ! if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo, ! DEFAULT_COLLATION_OID, ! values[attrnum], ! slot->tts_values[attrnum]))) return false; } diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index daf56cd..50e0f58 100644 *** a/src/backend/executor/nodeAgg.c --- b/src/backend/executor/nodeAgg.c *************** *** 219,224 **** --- 219,225 ---- #include "access/htup_details.h" #include "catalog/objectaccess.h" #include "catalog/pg_aggregate.h" + #include "catalog/pg_collation.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "executor/executor.h" *************** process_ordered_aggregate_single(AggStat *** 748,762 **** /* * If DISTINCT mode, and not distinct from prior, skip it. * ! * Note: we assume equality functions don't care about collation. */ if (isDistinct && haveOldVal && ((oldIsNull && *isNull) || (!oldIsNull && !*isNull && oldAbbrevVal == newAbbrevVal && ! DatumGetBool(FunctionCall2(&pertrans->equalfnOne, ! oldVal, *newVal))))) { /* equal to prior, so forget this one */ if (!pertrans->inputtypeByVal && !*isNull) --- 749,765 ---- /* * If DISTINCT mode, and not distinct from prior, skip it. * ! * Note: most equality functions don't care about collation. For ! * those that do, just select DEFAULT_COLLATION_OID. */ if (isDistinct && haveOldVal && ((oldIsNull && *isNull) || (!oldIsNull && !*isNull && oldAbbrevVal == newAbbrevVal && ! DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne, ! DEFAULT_COLLATION_OID, ! oldVal, *newVal))))) { /* equal to prior, so forget this one */ if (!pertrans->inputtypeByVal && !*isNull) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 84a1a91..b136650 100644 *** a/src/backend/executor/nodeSubplan.c --- b/src/backend/executor/nodeSubplan.c *************** *** 30,35 **** --- 30,36 ---- #include <math.h> #include "access/htup_details.h" + #include "catalog/pg_collation.h" #include "executor/executor.h" #include "executor/nodeSubplan.h" #include "nodes/makefuncs.h" *************** execTuplesUnequal(TupleTableSlot *slot1, *** 671,678 **** /* Apply the type-specific equality function */ ! if (!DatumGetBool(FunctionCall2(&eqfunctions[i], ! attr1, attr2))) { result = true; /* they are unequal */ break; --- 672,680 ---- /* Apply the type-specific equality function */ ! if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i], ! DEFAULT_COLLATION_OID, ! attr1, attr2))) { result = true; /* they are unequal */ break; diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 1b21da8..5fed920 100644 *** a/src/backend/utils/adt/orderedsetaggs.c --- b/src/backend/utils/adt/orderedsetaggs.c *************** *** 17,22 **** --- 17,23 ---- #include <math.h> #include "catalog/pg_aggregate.h" + #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "executor/executor.h" *************** mode_final(PG_FUNCTION_ARGS) *** 1084,1090 **** last_abbrev_val = abbrev_val; } else if (abbrev_val == last_abbrev_val && ! DatumGetBool(FunctionCall2(equalfn, val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) --- 1085,1092 ---- last_abbrev_val = abbrev_val; } else if (abbrev_val == last_abbrev_val && ! DatumGetBool(FunctionCall2Coll(equalfn, DEFAULT_COLLATION_OID, ! val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index cdda860..a77d610 100644 *** a/src/backend/utils/adt/ri_triggers.c --- b/src/backend/utils/adt/ri_triggers.c *************** ri_AttributesEqual(Oid eq_opr, Oid typei *** 2975,2985 **** } /* ! * Apply the comparison operator. We assume it doesn't care about ! * collations. */ ! return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo, ! oldvalue, newvalue)); } /* ---------- --- 2975,2985 ---- } /* ! * Apply the comparison operator. */ ! return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, ! DEFAULT_COLLATION_OID, ! oldvalue, newvalue)); } /* ---------- diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 0fd3b15..c3c89cf 100644 *** a/src/backend/utils/adt/varlena.c --- b/src/backend/utils/adt/varlena.c *************** texteq(PG_FUNCTION_ARGS) *** 1646,1651 **** --- 1646,1653 ---- Size len1, len2; + Assert(PG_GET_COLLATION() != 0); + /* * Since we only care about equality or not-equality, we can avoid all the * expense of strcoll() here, and just do bitwise comparison. In fact, we *************** split_text(PG_FUNCTION_ARGS) *** 4281,4289 **** static bool text_isequal(text *txt1, text *txt2) { ! return DatumGetBool(DirectFunctionCall2(texteq, ! PointerGetDatum(txt1), ! PointerGetDatum(txt2))); } /* --- 4283,4292 ---- static bool text_isequal(text *txt1, text *txt2) { ! return DatumGetBool(DirectFunctionCall2Coll(texteq, ! DEFAULT_COLLATION_OID, ! PointerGetDatum(txt1), ! PointerGetDatum(txt2))); } /* diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index b31fd5a..2ac6dd6 100644 *** a/src/backend/utils/cache/catcache.c --- b/src/backend/utils/cache/catcache.c *************** *** 22,27 **** --- 22,28 ---- #include "access/tuptoaster.h" #include "access/valid.h" #include "access/xact.h" + #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "miscadmin.h" *************** int4hashfast(Datum datum) *** 181,187 **** static bool texteqfast(Datum a, Datum b) { ! return DatumGetBool(DirectFunctionCall2(texteq, a, b)); } static uint32 --- 182,188 ---- static bool texteqfast(Datum a, Datum b) { ! return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b)); } static uint32 *************** CatalogCacheInitializeCache(CatCache *ca *** 1014,1021 **** /* Fill in sk_strategy as well --- always standard equality */ cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber; cache->cc_skey[i].sk_subtype = InvalidOid; ! /* Currently, there are no catcaches on collation-aware data types */ ! cache->cc_skey[i].sk_collation = InvalidOid; CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p", cache->cc_relname, --- 1015,1022 ---- /* Fill in sk_strategy as well --- always standard equality */ cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber; cache->cc_skey[i].sk_subtype = InvalidOid; ! /* If a catcache key requires a collation, it must be C collation */ ! cache->cc_skey[i].sk_collation = C_COLLATION_OID; CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p", cache->cc_relname,