Re: Collatability of type "name"

Tom Lane Tue, 18 Dec 2018 13:55:36 -0800

Robert Haas <robertmh...@gmail.com> writes:
> On Mon, Dec 10, 2018 at 2:50 AM Tom Lane <t...@sss.pgh.pa.us> wrote:
>> Also, I think that either solution would lead to some subtle changes
>> in semantics.  For example, right now if you compare a name column
>> to a text value, you get a text (collation-aware) comparison using
>> the database's default collation.  It looks like if name columns
>> are marked with attcollation = 'C', that would win and the comparison
>> would now have 'C' collation unless you explicitly override it with
>> a COLLATE clause.  I'm not sure this is a bad thing --- it'd be more
>> likely to match the sort order of the index on the column --- but it
>> could surprise people.


> It's not great to change the semantics of stuff like this, but it
> doesn't sound all that bad.

I had an epiphany after committing 6b0faf723: if we're forcing system
catalog columns to have "C" collation, there's no critical need for
type "name" to do that for itself.  We could upgrade "name" to be
collatable with typcollation = DEFAULT_COLLATION_OID, and then its
comparison semantics would be *exactly the same as text*.  Only the
physical representation is different.

This should mean that it's semantically trivial to unify the name_ops
opfamily with text_ops (not text_pattern_ops, as I'd previously supposed)
and add all the requisite cross-type operators.  I haven't actually
done that yet, but I have made a patch to make "name" fully
collation-aware, as attached.

This approach does have some minuses, though:

* There are assorted user-defined "name" columns in the regression
tests, which may introduce locale dependencies that weren't there
before.  I found a couple by running check-world under various locales,
and patched those in the attached, but it's definitely possible that
there are more issues in locales I didn't try.

* If any end users are using columns of type "name", they'd likewise
see behavioral changes, plus their indexes would be broken.  We
discourage people from using that type, so I don't think this is a
deal-breaker, but we'd at least have to add intelligence to pg_upgrade
to make it notice user-defined indexes on name columns and arrange
to reindex them.

We could eliminate those two problems if we made "name" have
typcollation "C" rather than "default", so that its semantics
wouldn't change without explicit collation specs.  This feels
like pretty much of a wart to me, but maybe it's worth doing
in the name of avoiding compatibility issues.  We could still
unify name_ops with text_ops, but now "name" would act more like
a domain with an explicit collation spec.

Thoughts?

                        regards, tom lane

diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index 6f2ad23..94ef6ea 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -333,30 +333,3 @@ btcharcmp(PG_FUNCTION_ARGS)
 	/* Be careful to compare chars as unsigned */
 	PG_RETURN_INT32((int32) ((uint8) a) - (int32) ((uint8) b));
 }
-
-Datum
-btnamecmp(PG_FUNCTION_ARGS)
-{
-	Name		a = PG_GETARG_NAME(0);
-	Name		b = PG_GETARG_NAME(1);
-
-	PG_RETURN_INT32(strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN));
-}
-
-static int
-btnamefastcmp(Datum x, Datum y, SortSupport ssup)
-{
-	Name		a = DatumGetName(x);
-	Name		b = DatumGetName(y);
-
-	return strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN);
-}
-
-Datum
-btnamesortsupport(PG_FUNCTION_ARGS)
-{
-	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
-
-	ssup->comparator = btnamefastcmp;
-	PG_RETURN_VOID();
-}
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 8e42255..e547100 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -111,7 +111,7 @@ static const struct typinfo TypInfo[] = {
 	F_INT4IN, F_INT4OUT},
 	{"float4", FLOAT4OID, 0, 4, FLOAT4PASSBYVAL, 'i', 'p', InvalidOid,
 	F_FLOAT4IN, F_FLOAT4OUT},
-	{"name", NAMEOID, CHAROID, NAMEDATALEN, false, 'c', 'p', InvalidOid,
+	{"name", NAMEOID, CHAROID, NAMEDATALEN, false, 'c', 'p', DEFAULT_COLLATION_OID,
 	F_NAMEIN, F_NAMEOUT},
 	{"regclass", REGCLASSOID, 0, 4, true, 'i', 'p', InvalidOid,
 	F_REGCLASSIN, F_REGCLASSOUT},
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 54b3dcf..4e7e3de 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -862,7 +862,11 @@ exprCollation(const Node *expr)
 			coll = ((const MinMaxExpr *) expr)->minmaxcollid;
 			break;
 		case T_SQLValueFunction:
-			coll = InvalidOid;	/* all cases return non-collatable types */
+			/* Returns either NAME or a non-collatable type */
+			if (((const SQLValueFunction *) expr)->type == NAMEOID)
+				coll = DEFAULT_COLLATION_OID;
+			else
+				coll = InvalidOid;
 			break;
 		case T_XmlExpr:
 
@@ -1075,7 +1079,9 @@ exprSetCollation(Node *expr, Oid collation)
 			((MinMaxExpr *) expr)->minmaxcollid = collation;
 			break;
 		case T_SQLValueFunction:
-			Assert(!OidIsValid(collation)); /* no collatable results */
+			Assert((((SQLValueFunction *) expr)->type == NAMEOID) ?
+				   (collation == DEFAULT_COLLATION_OID) :
+				   (collation == InvalidOid));
 			break;
 		case T_XmlExpr:
 			Assert((((XmlExpr *) expr)->op == IS_XMLSERIALIZE) ?
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 5f46415..b6d466a 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -4344,7 +4344,7 @@ string_to_const(const char *str, Oid datatype)
 			break;
 
 		case NAMEOID:
-			collation = InvalidOid;
+			collation = DEFAULT_COLLATION_OID;
 			constlen = NAMEDATALEN;
 			break;
 
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 4ba5120..2099724 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -788,7 +788,7 @@ transformRangeTableFunc(ParseState *pstate, RangeTableFunc *rtf)
 		tf->coltypes = lappend_oid(tf->coltypes, typid);
 		tf->coltypmods = lappend_int(tf->coltypmods, typmod);
 		tf->colcollations = lappend_oid(tf->colcollations,
-										type_is_collatable(typid) ? DEFAULT_COLLATION_OID : InvalidOid);
+										get_typcollation(typid));
 
 		/* Transform the PATH and DEFAULT expressions */
 		if (rawc->colexpr)
diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c
index c266da2..be3c322 100644
--- a/src/backend/utils/adt/name.c
+++ b/src/backend/utils/adt/name.c
@@ -21,6 +21,7 @@
 #include "postgres.h"
 
 #include "catalog/namespace.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
@@ -28,6 +29,7 @@
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/varlena.h"
 
 
 /*****************************************************************************
@@ -113,22 +115,21 @@ namesend(PG_FUNCTION_ARGS)
 
 
 /*****************************************************************************
- *	 PUBLIC ROUTINES														 *
+ *	 COMPARISON/SORTING ROUTINES											 *
  *****************************************************************************/
 
 /*
  *		nameeq	- returns 1 iff arguments are equal
  *		namene	- returns 1 iff arguments are not equal
- *
- *		BUGS:
- *				Assumes that "xy\0\0a" should be equal to "xy\0b".
- *				If not, can do the comparison backwards for efficiency.
- *
  *		namelt	- returns 1 iff a < b
  *		namele	- returns 1 iff a <= b
  *		namegt	- returns 1 iff a > b
  *		namege	- returns 1 iff a >= b
  *
+ * Note that the use of strncmp with NAMEDATALEN limit is mostly historical;
+ * strcmp would do as well, because we do not allow NAME values that don't
+ * have a '\0' terminator.  Whatever might be past the terminator is not
+ * considered relevant to equality.
  */
 Datum
 nameeq(PG_FUNCTION_ARGS)
@@ -136,6 +137,7 @@ nameeq(PG_FUNCTION_ARGS)
 	Name		arg1 = PG_GETARG_NAME(0);
 	Name		arg2 = PG_GETARG_NAME(1);
 
+	/* Collation doesn't matter: equal only if bitwise-equal */
 	PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0);
 }
 
@@ -145,16 +147,30 @@ namene(PG_FUNCTION_ARGS)
 	Name		arg1 = PG_GETARG_NAME(0);
 	Name		arg2 = PG_GETARG_NAME(1);
 
+	/* Collation doesn't matter: equal only if bitwise-equal */
 	PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0);
 }
 
+static int
+namecmp(Name arg1, Name arg2, Oid collid)
+{
+	/* Fast path for common case */
+	if (collid == C_COLLATION_OID)
+		return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+
+	/* Else rely on the varstr infrastructure */
+	return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
+					  NameStr(*arg2), strlen(NameStr(*arg2)),
+					  collid);
+}
+
 Datum
 namelt(PG_FUNCTION_ARGS)
 {
 	Name		arg1 = PG_GETARG_NAME(0);
 	Name		arg2 = PG_GETARG_NAME(1);
 
-	PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) < 0);
+	PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) < 0);
 }
 
 Datum
@@ -163,7 +179,7 @@ namele(PG_FUNCTION_ARGS)
 	Name		arg1 = PG_GETARG_NAME(0);
 	Name		arg2 = PG_GETARG_NAME(1);
 
-	PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) <= 0);
+	PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
 }
 
 Datum
@@ -172,7 +188,7 @@ namegt(PG_FUNCTION_ARGS)
 	Name		arg1 = PG_GETARG_NAME(0);
 	Name		arg2 = PG_GETARG_NAME(1);
 
-	PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) > 0);
+	PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) > 0);
 }
 
 Datum
@@ -181,11 +197,39 @@ namege(PG_FUNCTION_ARGS)
 	Name		arg1 = PG_GETARG_NAME(0);
 	Name		arg2 = PG_GETARG_NAME(1);
 
-	PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) >= 0);
+	PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
+}
+
+Datum
+btnamecmp(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_INT32(namecmp(arg1, arg2, PG_GET_COLLATION()));
 }
 
+Datum
+btnamesortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+	Oid			collid = ssup->ssup_collation;
+	MemoryContext oldcontext;
+
+	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+	/* Use generic string SortSupport */
+	varstr_sortsupport(ssup, NAMEOID, collid);
 
-/* (see char.c for comparison/operation routines) */
+	MemoryContextSwitchTo(oldcontext);
+
+	PG_RETURN_VOID();
+}
+
+
+/*****************************************************************************
+ *	 MISCELLANEOUS PUBLIC ROUTINES											 *
+ *****************************************************************************/
 
 int
 namecpy(Name n1, const NameData *n2)
@@ -204,14 +248,6 @@ namecat(Name n1, Name n2)
 }
 #endif
 
-#ifdef NOT_USED
-int
-namecmp(Name n1, Name n2)
-{
-	return strncmp(NameStr(*n1), NameStr(*n2), NAMEDATALEN);
-}
-#endif
-
 int
 namestrcpy(Name name, const char *str)
 {
@@ -243,6 +279,12 @@ namestrcat(Name name, const char *str)
 }
 #endif
 
+/*
+ * Compare a NAME to a C string
+ *
+ * Assumes C collation always; be careful when using this for
+ * anything but equality checks!
+ */
 int
 namestrcmp(Name name, const char *str)
 {
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c3db9ea..1776ed7 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6341,23 +6341,16 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
 	char	   *workstr;
 	int			len;
 	Datum		cmpstr;
-	text	   *cmptxt = NULL;
+	char	   *cmptxt = NULL;
 	mbcharacter_incrementer charinc;
 
 	/*
 	 * Get a modifiable copy of the prefix string in C-string format, and set
 	 * up the string we will compare to as a Datum.  In C locale this can just
-	 * be the given prefix string, otherwise we need to add a suffix.  Types
-	 * NAME and BYTEA sort bytewise so they don't need a suffix either.
+	 * be the given prefix string, otherwise we need to add a suffix.  Type
+	 * BYTEA sorts bytewise so it never needs a suffix either.
 	 */
-	if (datatype == NAMEOID)
-	{
-		workstr = DatumGetCString(DirectFunctionCall1(nameout,
-													  str_const->constvalue));
-		len = strlen(workstr);
-		cmpstr = str_const->constvalue;
-	}
-	else if (datatype == BYTEAOID)
+	if (datatype == BYTEAOID)
 	{
 		bytea	   *bstr = DatumGetByteaPP(str_const->constvalue);
 
@@ -6369,7 +6362,11 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
 	}
 	else
 	{
-		workstr = TextDatumGetCString(str_const->constvalue);
+		if (datatype == NAMEOID)
+			workstr = DatumGetCString(DirectFunctionCall1(nameout,
+														  str_const->constvalue));
+		else
+			workstr = TextDatumGetCString(str_const->constvalue);
 		len = strlen(workstr);
 		if (lc_collate_is_c(collation) || len == 0)
 			cmpstr = str_const->constvalue;
@@ -6395,11 +6392,22 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
 			}
 
 			/* And build the string to compare to */
-			cmptxt = (text *) palloc(VARHDRSZ + len + 1);
-			SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
-			memcpy(VARDATA(cmptxt), workstr, len);
-			*(VARDATA(cmptxt) + len) = suffixchar;
-			cmpstr = PointerGetDatum(cmptxt);
+			if (datatype == NAMEOID)
+			{
+				cmptxt = palloc(len + 2);
+				memcpy(cmptxt, workstr, len);
+				cmptxt[len] = suffixchar;
+				cmptxt[len + 1] = '\0';
+				cmpstr = PointerGetDatum(cmptxt);
+			}
+			else
+			{
+				cmptxt = palloc(VARHDRSZ + len + 1);
+				SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
+				memcpy(VARDATA(cmptxt), workstr, len);
+				*(VARDATA(cmptxt) + len) = suffixchar;
+				cmpstr = PointerGetDatum(cmptxt);
+			}
 		}
 	}
 
@@ -6518,7 +6526,7 @@ string_to_const(const char *str, Oid datatype)
 			break;
 
 		case NAMEOID:
-			collation = InvalidOid;
+			collation = DEFAULT_COLLATION_OID;
 			constlen = NAMEDATALEN;
 			break;
 
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 8f07b1e..d5a5800 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -18,6 +18,7 @@
 #include "access/hash.h"
 #include "access/tuptoaster.h"
 #include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
 #include "libpq/pqformat.h"
 #include "nodes/nodeFuncs.h"
 #include "utils/array.h"
@@ -876,7 +877,7 @@ bpchar_sortsupport(PG_FUNCTION_ARGS)
 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
 	/* Use generic string SortSupport */
-	varstr_sortsupport(ssup, collid, true);
+	varstr_sortsupport(ssup, BPCHAROID, collid);
 
 	MemoryContextSwitchTo(oldcontext);
 
@@ -1085,7 +1086,7 @@ btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
 	/* Use generic string SortSupport, forcing "C" collation */
-	varstr_sortsupport(ssup, C_COLLATION_OID, true);
+	varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
 
 	MemoryContextSwitchTo(oldcontext);
 
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0fd3b15..a4fb588 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -69,7 +69,7 @@ typedef struct
 	int			last_returned;	/* Last comparison result (cache) */
 	bool		cache_blob;		/* Does buf2 contain strxfrm() blob, etc? */
 	bool		collate_c;
-	bool		bpchar;			/* Sorting bpchar, not varchar/text/bytea? */
+	Oid			typeid;			/* Actual datatype (text/bpchar/bytea/name) */
 	hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
 	hyperLogLogState full_card; /* Full key cardinality state */
 	double		prop_card;		/* Required cardinality proportion */
@@ -93,7 +93,10 @@ typedef struct
 
 static int	varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
 static int	bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
-static int	varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int	namefastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int	varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int	namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int	varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
 static int	varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
 static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
 static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
@@ -1814,7 +1817,7 @@ bttextsortsupport(PG_FUNCTION_ARGS)
 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
 	/* Use generic string SortSupport */
-	varstr_sortsupport(ssup, collid, false);
+	varstr_sortsupport(ssup, TEXTOID, collid);
 
 	MemoryContextSwitchTo(oldcontext);
 
@@ -1832,7 +1835,7 @@ bttextsortsupport(PG_FUNCTION_ARGS)
  * this will not work with any other collation, though.
  */
 void
-varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
+varstr_sortsupport(SortSupport ssup, Oid typeid, Oid collid)
 {
 	bool		abbreviate = ssup->abbreviate;
 	bool		collate_c = false;
@@ -1845,18 +1848,25 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
 	 * overhead of a trip through the fmgr layer for every comparison, which
 	 * can be substantial.
 	 *
-	 * Most typically, we'll set the comparator to varstrfastcmp_locale, which
-	 * uses strcoll() to perform comparisons and knows about the special
-	 * requirements of BpChar callers.  However, if LC_COLLATE = C, we can
-	 * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
-	 * both of which use memcmp() rather than strcoll().
+	 * Most typically, we'll set the comparator to varlenafastcmp_locale,
+	 * which uses strcoll() to perform comparisons.  We use that for the
+	 * BpChar case too, but type NAME uses namefastcmp_locale. However, if
+	 * LC_COLLATE = C, we can make things quite a bit faster with
+	 * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
+	 * memcmp() rather than strcoll().
 	 */
 	if (lc_collate_is_c(collid))
 	{
-		if (!bpchar)
-			ssup->comparator = varstrfastcmp_c;
-		else
+		if (typeid == BPCHAROID)
 			ssup->comparator = bpcharfastcmp_c;
+		else if (typeid == NAMEOID)
+		{
+			ssup->comparator = namefastcmp_c;
+			/* Not supporting abbreviation with type NAME, for now */
+			abbreviate = false;
+		}
+		else
+			ssup->comparator = varstrfastcmp_c;
 
 		collate_c = true;
 	}
@@ -1897,7 +1907,17 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
 			return;
 #endif
 
-		ssup->comparator = varstrfastcmp_locale;
+		/*
+		 * We use varlenafastcmp_locale except for type NAME.
+		 */
+		if (typeid == NAMEOID)
+		{
+			ssup->comparator = namefastcmp_locale;
+			/* Not supporting abbreviation with type NAME, for now */
+			abbreviate = false;
+		}
+		else
+			ssup->comparator = varlenafastcmp_locale;
 	}
 
 	/*
@@ -1963,7 +1983,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
 		 */
 		sss->cache_blob = true;
 		sss->collate_c = collate_c;
-		sss->bpchar = bpchar;
+		sss->typeid = typeid;
 		ssup->ssup_extra = sss;
 
 		/*
@@ -2055,17 +2075,25 @@ bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
 }
 
 /*
- * sortsupport comparison func (for locale case)
+ * sortsupport comparison func (for NAME C locale case)
+ */
+static int
+namefastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+	Name		arg1 = DatumGetName(x);
+	Name		arg2 = DatumGetName(y);
+
+	return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+}
+
+/*
+ * sortsupport comparison func (for locale case with all varlena types)
  */
 static int
-varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
+varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
 {
 	VarString  *arg1 = DatumGetVarStringPP(x);
 	VarString  *arg2 = DatumGetVarStringPP(y);
-	bool		arg1_match;
-	VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
-
-	/* working state */
 	char	   *a1p,
 			   *a2p;
 	int			len1,
@@ -2078,6 +2106,41 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 	len1 = VARSIZE_ANY_EXHDR(arg1);
 	len2 = VARSIZE_ANY_EXHDR(arg2);
 
+	result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
+
+	/* We can't afford to leak memory here. */
+	if (PointerGetDatum(arg1) != x)
+		pfree(arg1);
+	if (PointerGetDatum(arg2) != y)
+		pfree(arg2);
+
+	return result;
+}
+
+/*
+ * sortsupport comparison func (for locale case with NAME type)
+ */
+static int
+namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
+{
+	Name		arg1 = DatumGetName(x);
+	Name		arg2 = DatumGetName(y);
+
+	return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
+								NameStr(*arg2), strlen(NameStr(*arg2)),
+								ssup);
+}
+
+/*
+ * sortsupport comparison func for locale cases
+ */
+static int
+varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
+{
+	VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
+	int			result;
+	bool		arg1_match;
+
 	/* Fast pre-check for equality, as discussed in varstr_cmp() */
 	if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
 	{
@@ -2094,11 +2157,10 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 		 * (not limited to padding), so we need make no distinction between
 		 * padding space characters and "real" space characters.
 		 */
-		result = 0;
-		goto done;
+		return 0;
 	}
 
-	if (sss->bpchar)
+	if (sss->typeid == BPCHAROID)
 	{
 		/* Get true number of bytes, ignoring trailing spaces */
 		len1 = bpchartruelen(a1p, len1);
@@ -2152,8 +2214,7 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 	else if (arg1_match && !sss->cache_blob)
 	{
 		/* Use result cached following last actual strcoll() call */
-		result = sss->last_returned;
-		goto done;
+		return sss->last_returned;
 	}
 
 	if (sss->locale)
@@ -2222,13 +2283,6 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 	/* Cache result, perhaps saving an expensive strcoll() call next time */
 	sss->cache_blob = false;
 	sss->last_returned = result;
-done:
-	/* We can't afford to leak memory here. */
-	if (PointerGetDatum(arg1) != x)
-		pfree(arg1);
-	if (PointerGetDatum(arg2) != y)
-		pfree(arg2);
-
 	return result;
 }
 
@@ -2240,7 +2294,7 @@ varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
 {
 	/*
 	 * When 0 is returned, the core system will call varstrfastcmp_c()
-	 * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale().  Even a
+	 * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale().  Even a
 	 * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
 	 * authoritatively, for the same reason that there is a strcoll()
 	 * tie-breaker call to strcmp() in varstr_cmp().
@@ -2279,7 +2333,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 	len = VARSIZE_ANY_EXHDR(authoritative);
 
 	/* Get number of bytes, ignoring trailing spaces */
-	if (sss->bpchar)
+	if (sss->typeid == BPCHAROID)
 		len = bpchartruelen(authoritative_data, len);
 
 	/*
@@ -2758,7 +2812,7 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
 	/* Use generic string SortSupport, forcing "C" collation */
-	varstr_sortsupport(ssup, C_COLLATION_OID, false);
+	varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
 
 	MemoryContextSwitchTo(oldcontext);
 
@@ -3798,7 +3852,7 @@ bytea_sortsupport(PG_FUNCTION_ARGS)
 	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
 	/* Use generic string SortSupport, forcing "C" collation */
-	varstr_sortsupport(ssup, C_COLLATION_OID, false);
+	varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
 
 	MemoryContextSwitchTo(oldcontext);
 
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index d295eae..a45977b 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -54,7 +54,7 @@
   typname => 'name', typlen => 'NAMEDATALEN', typbyval => 'f',
   typcategory => 'S', typelem => 'char', typinput => 'namein',
   typoutput => 'nameout', typreceive => 'namerecv', typsend => 'namesend',
-  typalign => 'c' },
+  typalign => 'c', typcollation => '100' },
 { oid => '20', array_type_oid => '1016',
   descr => '~18 digit integer, 8-byte storage',
   typname => 'int8', typlen => '8', typbyval => 'FLOAT8PASSBYVAL',
diff --git a/src/include/utils/varlena.h b/src/include/utils/varlena.h
index c776931..ee3e205 100644
--- a/src/include/utils/varlena.h
+++ b/src/include/utils/varlena.h
@@ -17,7 +17,7 @@
 #include "utils/sortsupport.h"
 
 extern int	varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid);
-extern void varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar);
+extern void varstr_sortsupport(SortSupport ssup, Oid typeid, Oid collid);
 extern int varstr_levenshtein(const char *source, int slen,
 				   const char *target, int tlen,
 				   int ins_c, int del_c, int sub_c,
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index 8bacc74..fb3fb5b 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -607,7 +607,7 @@ do_compile(FunctionCallInfo fcinfo,
 			var = plpgsql_build_variable("tg_name", 0,
 										 plpgsql_build_datatype(NAMEOID,
 																-1,
-																InvalidOid),
+																function->fn_input_collation),
 										 true);
 			Assert(var->dtype == PLPGSQL_DTYPE_VAR);
 			var->dtype = PLPGSQL_DTYPE_PROMISE;
@@ -657,7 +657,7 @@ do_compile(FunctionCallInfo fcinfo,
 			var = plpgsql_build_variable("tg_relname", 0,
 										 plpgsql_build_datatype(NAMEOID,
 																-1,
-																InvalidOid),
+																function->fn_input_collation),
 										 true);
 			Assert(var->dtype == PLPGSQL_DTYPE_VAR);
 			var->dtype = PLPGSQL_DTYPE_PROMISE;
@@ -667,7 +667,7 @@ do_compile(FunctionCallInfo fcinfo,
 			var = plpgsql_build_variable("tg_table_name", 0,
 										 plpgsql_build_datatype(NAMEOID,
 																-1,
-																InvalidOid),
+																function->fn_input_collation),
 										 true);
 			Assert(var->dtype == PLPGSQL_DTYPE_VAR);
 			var->dtype = PLPGSQL_DTYPE_PROMISE;
@@ -677,7 +677,7 @@ do_compile(FunctionCallInfo fcinfo,
 			var = plpgsql_build_variable("tg_table_schema", 0,
 										 plpgsql_build_datatype(NAMEOID,
 																-1,
-																InvalidOid),
+																function->fn_input_collation),
 										 true);
 			Assert(var->dtype == PLPGSQL_DTYPE_VAR);
 			var->dtype = PLPGSQL_DTYPE_PROMISE;
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index f259d07..2ac8adf 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1048,7 +1048,7 @@ SELECT a.attrelid::regclass, a.attname, a.attinhcount, e.expected
   FROM (SELECT inhrelid, count(*) AS expected FROM pg_inherits
         WHERE inhparent IN (SELECT inhrelid FROM r) GROUP BY inhrelid) e
   JOIN pg_attribute a ON e.inhrelid = a.attrelid WHERE NOT attislocal
-  ORDER BY a.attrelid::regclass::name, a.attnum;
+  ORDER BY a.attrelid::regclass::name COLLATE "C", a.attnum;
  attrelid | attname | attinhcount | expected 
 ----------+---------+-------------+----------
  inht2    | aaaa    |           1 |        1
diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out
index 14fcd3b..9bebd6b 100644
--- a/src/test/regress/expected/name.out
+++ b/src/test/regress/expected/name.out
@@ -18,7 +18,7 @@ SELECT name 'name string' = name 'name string ' AS "False";
 --
 --
 --
-CREATE TABLE NAME_TBL(f1 name);
+CREATE TABLE NAME_TBL(f1 name COLLATE "C");
 INSERT INTO NAME_TBL(f1) VALUES ('1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890ABCDEFGHIJKLMNOPQR');
 INSERT INTO NAME_TBL(f1) VALUES ('1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqr');
 INSERT INTO NAME_TBL(f1) VALUES ('asdfghjkl;');
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index 425052c..4cbced5 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -348,7 +348,7 @@ SELECT a.attrelid::regclass, a.attname, a.attinhcount, e.expected
   FROM (SELECT inhrelid, count(*) AS expected FROM pg_inherits
         WHERE inhparent IN (SELECT inhrelid FROM r) GROUP BY inhrelid) e
   JOIN pg_attribute a ON e.inhrelid = a.attrelid WHERE NOT attislocal
-  ORDER BY a.attrelid::regclass::name, a.attnum;
+  ORDER BY a.attrelid::regclass::name COLLATE "C", a.attnum;
 
 DROP TABLE inht1, inhs1 CASCADE;
 
diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql
index 602bf26..7bd4b10 100644
--- a/src/test/regress/sql/name.sql
+++ b/src/test/regress/sql/name.sql
@@ -12,7 +12,7 @@ SELECT name 'name string' = name 'name string ' AS "False";
 --
 --
 
-CREATE TABLE NAME_TBL(f1 name);
+CREATE TABLE NAME_TBL(f1 name COLLATE "C");
 
 INSERT INTO NAME_TBL(f1) VALUES ('1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890ABCDEFGHIJKLMNOPQR');

Re: Collatability of type "name"

Reply via email to