I wrote:
> Andres Freund <and...@anarazel.de> writes:
>> On 2018-12-14 14:25:30 -0500, Tom Lane wrote:
>>> Now, it's certainly true that nameeq() doesn't need a collation spec
>>> today, any more than texteq() does, because both types legislate that
>>> equality is bitwise.  But if we leave ExecBuildGroupingEqual like this,
>>> we're mandating that no type anywhere, ever, can have a
>>> collation-dependent notion of equality.  Is that really a restriction
>>> we're comfortable with?  citext is sort of the poster child here,
>>> because it already wishes it could have collation-dependent equality.

>> Don't we rely on that in other places too?

> Possibly; the regression tests probably don't stress type "name" too hard,
> so my Assert might well not be finding some other code paths that do
> likewise.

To investigate this a bit more, I added a similar Assert in texteq(),
and also grepped for places that were using FunctionCall2 to call
equality functions.  The attached patch shows what I had to change
to get check-world to pass.  It's possible that there are one or two
more places I missed --- two of these changes were found by grepping
but were *not* exposed by regression testing.  But this ought to be
a pretty good indication of the scope of the problem.

There are a couple of places touched here that know they are invoking
texteq specifically, so we wouldn't really need to change them to have
a working feature.  In all I found six places that we'd need to deal with
if we want to support collation-dependent equality.

A possible medium-term compromise is to pass DEFAULT_COLLATION_OID,
as I've done here, so that at least a collation-examining equality
function won't fall over.  A variant of that is to pass C_COLLATION_OID,
which presumably would be faster to execute in many cases; it would
also have the advantage of being database-independent, which might
be a good thing in execReplication.c for instance.

Or we could just decide that collation-dependent equality is a bridge
too far for today.  I'm not hugely excited about pursuing it myself,
I just wanted to get a handle on how badly broken it is.

                        regards, tom lane

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index d9087ca..fa00a36 100644
*** a/src/backend/executor/execExpr.c
--- b/src/backend/executor/execExpr.c
***************
*** 32,37 ****
--- 32,38 ----
  
  #include "access/nbtree.h"
  #include "catalog/objectaccess.h"
+ #include "catalog/pg_collation.h"
  #include "catalog/pg_type.h"
  #include "executor/execExpr.h"
  #include "executor/nodeSubplan.h"
*************** ExecBuildGroupingEqual(TupleDesc ldesc, 
*** 3393,3399 ****
  		fmgr_info(foid, finfo);
  		fmgr_info_set_expr(NULL, finfo);
  		InitFunctionCallInfoData(*fcinfo, finfo, 2,
! 								 InvalidOid, NULL, NULL);
  
  		/* left arg */
  		scratch.opcode = EEOP_INNER_VAR;
--- 3394,3400 ----
  		fmgr_info(foid, finfo);
  		fmgr_info_set_expr(NULL, finfo);
  		InitFunctionCallInfoData(*fcinfo, finfo, 2,
! 								 DEFAULT_COLLATION_OID, NULL, NULL);
  
  		/* left arg */
  		scratch.opcode = EEOP_INNER_VAR;
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 5bd3bbc..cd40071 100644
*** a/src/backend/executor/execReplication.c
--- b/src/backend/executor/execReplication.c
***************
*** 17,22 ****
--- 17,23 ----
  #include "access/relscan.h"
  #include "access/transam.h"
  #include "access/xact.h"
+ #include "catalog/pg_collation.h"
  #include "commands/trigger.h"
  #include "executor/executor.h"
  #include "nodes/nodeFuncs.h"
*************** tuple_equals_slot(TupleDesc desc, HeapTu
*** 265,273 ****
  					 errmsg("could not identify an equality operator for type %s",
  							format_type_be(att->atttypid))));
  
! 		if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
! 										values[attrnum],
! 										slot->tts_values[attrnum])))
  			return false;
  	}
  
--- 266,275 ----
  					 errmsg("could not identify an equality operator for type %s",
  							format_type_be(att->atttypid))));
  
! 		if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo,
! 											DEFAULT_COLLATION_OID,
! 											values[attrnum],
! 											slot->tts_values[attrnum])))
  			return false;
  	}
  
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index daf56cd..50e0f58 100644
*** a/src/backend/executor/nodeAgg.c
--- b/src/backend/executor/nodeAgg.c
***************
*** 219,224 ****
--- 219,225 ----
  #include "access/htup_details.h"
  #include "catalog/objectaccess.h"
  #include "catalog/pg_aggregate.h"
+ #include "catalog/pg_collation.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_type.h"
  #include "executor/executor.h"
*************** process_ordered_aggregate_single(AggStat
*** 748,762 ****
  		/*
  		 * If DISTINCT mode, and not distinct from prior, skip it.
  		 *
! 		 * Note: we assume equality functions don't care about collation.
  		 */
  		if (isDistinct &&
  			haveOldVal &&
  			((oldIsNull && *isNull) ||
  			 (!oldIsNull && !*isNull &&
  			  oldAbbrevVal == newAbbrevVal &&
! 			  DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
! 										 oldVal, *newVal)))))
  		{
  			/* equal to prior, so forget this one */
  			if (!pertrans->inputtypeByVal && !*isNull)
--- 749,765 ----
  		/*
  		 * If DISTINCT mode, and not distinct from prior, skip it.
  		 *
! 		 * Note: most equality functions don't care about collation.  For
! 		 * those that do, just select DEFAULT_COLLATION_OID.
  		 */
  		if (isDistinct &&
  			haveOldVal &&
  			((oldIsNull && *isNull) ||
  			 (!oldIsNull && !*isNull &&
  			  oldAbbrevVal == newAbbrevVal &&
! 			  DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
! 											 DEFAULT_COLLATION_OID,
! 											 oldVal, *newVal)))))
  		{
  			/* equal to prior, so forget this one */
  			if (!pertrans->inputtypeByVal && !*isNull)
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 84a1a91..b136650 100644
*** a/src/backend/executor/nodeSubplan.c
--- b/src/backend/executor/nodeSubplan.c
***************
*** 30,35 ****
--- 30,36 ----
  #include <math.h>
  
  #include "access/htup_details.h"
+ #include "catalog/pg_collation.h"
  #include "executor/executor.h"
  #include "executor/nodeSubplan.h"
  #include "nodes/makefuncs.h"
*************** execTuplesUnequal(TupleTableSlot *slot1,
*** 671,678 ****
  
  		/* Apply the type-specific equality function */
  
! 		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
! 										attr1, attr2)))
  		{
  			result = true;		/* they are unequal */
  			break;
--- 672,680 ----
  
  		/* Apply the type-specific equality function */
  
! 		if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i],
! 											DEFAULT_COLLATION_OID,
! 											attr1, attr2)))
  		{
  			result = true;		/* they are unequal */
  			break;
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 1b21da8..5fed920 100644
*** a/src/backend/utils/adt/orderedsetaggs.c
--- b/src/backend/utils/adt/orderedsetaggs.c
***************
*** 17,22 ****
--- 17,23 ----
  #include <math.h>
  
  #include "catalog/pg_aggregate.h"
+ #include "catalog/pg_collation.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "executor/executor.h"
*************** mode_final(PG_FUNCTION_ARGS)
*** 1084,1090 ****
  			last_abbrev_val = abbrev_val;
  		}
  		else if (abbrev_val == last_abbrev_val &&
! 				 DatumGetBool(FunctionCall2(equalfn, val, last_val)))
  		{
  			/* value equal to previous value, count it */
  			if (last_val_is_mode)
--- 1085,1092 ----
  			last_abbrev_val = abbrev_val;
  		}
  		else if (abbrev_val == last_abbrev_val &&
! 				 DatumGetBool(FunctionCall2Coll(equalfn, DEFAULT_COLLATION_OID,
! 												val, last_val)))
  		{
  			/* value equal to previous value, count it */
  			if (last_val_is_mode)
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index cdda860..a77d610 100644
*** a/src/backend/utils/adt/ri_triggers.c
--- b/src/backend/utils/adt/ri_triggers.c
*************** ri_AttributesEqual(Oid eq_opr, Oid typei
*** 2975,2985 ****
  	}
  
  	/*
! 	 * Apply the comparison operator.  We assume it doesn't care about
! 	 * collations.
  	 */
! 	return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo,
! 									  oldvalue, newvalue));
  }
  
  /* ----------
--- 2975,2985 ----
  	}
  
  	/*
! 	 * Apply the comparison operator.
  	 */
! 	return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo,
! 										  DEFAULT_COLLATION_OID,
! 										  oldvalue, newvalue));
  }
  
  /* ----------
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0fd3b15..c3c89cf 100644
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
*************** texteq(PG_FUNCTION_ARGS)
*** 1646,1651 ****
--- 1646,1653 ----
  	Size		len1,
  				len2;
  
+ 	Assert(PG_GET_COLLATION() != 0);
+ 
  	/*
  	 * Since we only care about equality or not-equality, we can avoid all the
  	 * expense of strcoll() here, and just do bitwise comparison.  In fact, we
*************** split_text(PG_FUNCTION_ARGS)
*** 4281,4289 ****
  static bool
  text_isequal(text *txt1, text *txt2)
  {
! 	return DatumGetBool(DirectFunctionCall2(texteq,
! 											PointerGetDatum(txt1),
! 											PointerGetDatum(txt2)));
  }
  
  /*
--- 4283,4292 ----
  static bool
  text_isequal(text *txt1, text *txt2)
  {
! 	return DatumGetBool(DirectFunctionCall2Coll(texteq,
! 												DEFAULT_COLLATION_OID,
! 												PointerGetDatum(txt1),
! 												PointerGetDatum(txt2)));
  }
  
  /*
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index b31fd5a..2ac6dd6 100644
*** a/src/backend/utils/cache/catcache.c
--- b/src/backend/utils/cache/catcache.c
***************
*** 22,27 ****
--- 22,28 ----
  #include "access/tuptoaster.h"
  #include "access/valid.h"
  #include "access/xact.h"
+ #include "catalog/pg_collation.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "miscadmin.h"
*************** int4hashfast(Datum datum)
*** 181,187 ****
  static bool
  texteqfast(Datum a, Datum b)
  {
! 	return DatumGetBool(DirectFunctionCall2(texteq, a, b));
  }
  
  static uint32
--- 182,188 ----
  static bool
  texteqfast(Datum a, Datum b)
  {
! 	return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b));
  }
  
  static uint32
*************** CatalogCacheInitializeCache(CatCache *ca
*** 1014,1021 ****
  		/* Fill in sk_strategy as well --- always standard equality */
  		cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber;
  		cache->cc_skey[i].sk_subtype = InvalidOid;
! 		/* Currently, there are no catcaches on collation-aware data types */
! 		cache->cc_skey[i].sk_collation = InvalidOid;
  
  		CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p",
  					cache->cc_relname,
--- 1015,1022 ----
  		/* Fill in sk_strategy as well --- always standard equality */
  		cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber;
  		cache->cc_skey[i].sk_subtype = InvalidOid;
! 		/* If a catcache key requires a collation, it must be C collation */
! 		cache->cc_skey[i].sk_collation = C_COLLATION_OID;
  
  		CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p",
  					cache->cc_relname,

Reply via email to