The blog post here (thank you depesz!):

https://www.depesz.com/2024/06/11/how-much-speed-youre-leaving-at-the-table-if-you-use-default-locale/

showed an interesting result where the builtin provider is not quite as
fast as "C" for queries like:

   SELECT * FROM a WHERE t = '...';

The reason is that it's calling varstr_cmp() many times, which does a
lookup in the collation cache for each call. For sorts, it only does a
lookup in the collation cache once, so the effect is not significant.

The reason looking up "C" is faster is because there's a special check
for C_COLLATION_OID, so it doesn't even need to do the hash lookup. If
you create an equivalent collation like:

   CREATE COLLATION libc_c(PROVIDER = libc, LOCALE = 'C');

it will perform the same as a collation with the builtin provider.

Attached is a patch to use simplehash.h instead, which speeds things up
enough to make them fairly close (from around 15% slower to around 8%).

The patch is based on the series here:

https://postgr.es/m/f1935bc481438c9d86c2e0ac537b1c110d41a00a.ca...@j-davis.com

which does some refactoring in a related area, but I can make them
independent.

We can also consider what to do about those special cases:

  * add a special case for PG_C_UTF8?
  * instead of a hardwired set of special collation IDs, have a single-
element "last collation ID" to check before doing the hash lookup?
  * remove the special cases entirely if we can close the performance
gap enough that it's not important?

(Note: the special case in lc_ctpye_is_c() is currently required for
correctness because hba.c uses C_COLLATION_OID for regexes before the
syscache is initialized. That can be fixed pretty easily a couple
different ways, though.)

-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From 777186a41955da8d05929f5c34e531dfa985b513 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 14 Jun 2024 15:38:42 -0700
Subject: [PATCH v2 7/7] Change collation cache to use simplehash.h.

---
 src/backend/utils/adt/pg_locale.c | 39 +++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 435a37a0e3..b71ca2d780 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -57,12 +57,12 @@
 #include "access/htup_details.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
+#include "common/hashfn.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/formatting.h"
 #include "utils/guc_hooks.h"
-#include "utils/hsearch.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_locale.h"
@@ -129,10 +129,27 @@ typedef struct
 {
 	Oid			collid;			/* hash key: pg_collation OID */
 	pg_locale_t locale;			/* locale_t struct, or 0 if not valid */
-} collation_cache_entry;
 
-static HTAB *collation_cache = NULL;
+	/* needed for simplehash */
+	uint32		hash;
+	char		status;
+} collation_cache_entry;
 
+#define SH_PREFIX		collation_cache
+#define SH_ELEMENT_TYPE	collation_cache_entry
+#define SH_KEY_TYPE		Oid
+#define SH_KEY			collid
+#define SH_HASH_KEY(tb, key)   	hash_uint32((uint32) key)
+#define SH_EQUAL(tb, a, b)		(a == b)
+#define SH_GET_HASH(tb, a)		a->hash
+#define SH_SCOPE		static inline
+#define SH_STORE_HASH
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static MemoryContext CollationCacheContext = NULL;
+static collation_cache_hash *CollationCache = NULL;
 
 #if defined(WIN32) && defined(LC_MESSAGES)
 static char *IsoLocaleName(const char *);
@@ -1235,18 +1252,16 @@ lookup_collation_cache(Oid collation)
 	Assert(OidIsValid(collation));
 	Assert(collation != DEFAULT_COLLATION_OID);
 
-	if (collation_cache == NULL)
+	if (CollationCache == NULL)
 	{
-		/* First time through, initialize the hash table */
-		HASHCTL		ctl;
-
-		ctl.keysize = sizeof(Oid);
-		ctl.entrysize = sizeof(collation_cache_entry);
-		collation_cache = hash_create("Collation cache", 100, &ctl,
-									  HASH_ELEM | HASH_BLOBS);
+		CollationCacheContext = AllocSetContextCreate(TopMemoryContext,
+													  "collation cache",
+													  ALLOCSET_DEFAULT_SIZES);
+		CollationCache = collation_cache_create(
+			CollationCacheContext, 128, NULL);
 	}
 
-	cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
+	cache_entry = collation_cache_insert(CollationCache, collation, &found);
 	if (!found)
 	{
 		/*
-- 
2.34.1

Reply via email to