On Thu, 2024-06-20 at 17:07 +0700, John Naylor wrote:
> On Sat, Jun 15, 2024 at 6:46 AM Jeff Davis <pg...@j-davis.com> wrote:
> > Attached is a patch to use simplehash.h instead, which speeds
> > things up
> > enough to make them fairly close (from around 15% slower to around
> > 8%).
> 
> +#define SH_HASH_KEY(tb, key)   hash_uint32((uint32) key)
> 
> For a static inline hash for speed reasons, we can use murmurhash32
> here, which is also inline.

Thank you, that brings it down a few more percentage points.

New patches attached, still based on the setlocale-removal patch
series.

Setup:

  create collation libc_c (provider=libc, locale='C');
  create table collation_cache_test(t text);
  insert into collation_cache_test
    select g::text||' '||g::text
      from generate_series(1,200000000) g;

Queries:

  select * from collation_cache_test where t < '0' collate "C";
  select * from collation_cache_test where t < '0' collate libc_c;

The two collations are identical except that the former benefits from
the optimization for C_COLLATION_OID, and the latter does not, so these
queries measure the overhead of the collation cache lookup.

Results (in ms):

              "C"   "libc_c"   overhead
   master:    6350     7855     24%
   v4-0001:   6091     6324      4%

(Note: I don't have an explanation for the difference in performance of
the "C" locale -- probably just some noise in the test.)

Considering that simplehash brings the worst case overhead under 5%, I
don't see a big reason to use the single-element cache also.

Regards,
        Jeff Davis

From 64a017f169858cf646002a28f97ae05cb7ab9fcd Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 14 Jun 2024 15:38:42 -0700
Subject: [PATCH v4] Change collation cache to use simplehash.h.

---
 src/backend/utils/adt/pg_locale.c | 39 +++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 2e6f624798f..5afb69c6632 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -57,12 +57,12 @@
 #include "access/htup_details.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
+#include "common/hashfn.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/formatting.h"
 #include "utils/guc_hooks.h"
-#include "utils/hsearch.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_locale.h"
@@ -129,10 +129,27 @@ typedef struct
 {
 	Oid			collid;			/* hash key: pg_collation OID */
 	pg_locale_t locale;			/* locale_t struct, or 0 if not valid */
-} collation_cache_entry;
 
-static HTAB *collation_cache = NULL;
+	/* needed for simplehash */
+	uint32		hash;
+	char		status;
+} collation_cache_entry;
 
+#define SH_PREFIX		collation_cache
+#define SH_ELEMENT_TYPE	collation_cache_entry
+#define SH_KEY_TYPE		Oid
+#define SH_KEY			collid
+#define SH_HASH_KEY(tb, key)   	murmurhash32((uint32) key)
+#define SH_EQUAL(tb, a, b)		(a == b)
+#define SH_GET_HASH(tb, a)		a->hash
+#define SH_SCOPE		static inline
+#define SH_STORE_HASH
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static MemoryContext CollationCacheContext = NULL;
+static collation_cache_hash *CollationCache = NULL;
 
 #if defined(WIN32) && defined(LC_MESSAGES)
 static char *IsoLocaleName(const char *);
@@ -1219,18 +1236,16 @@ lookup_collation_cache(Oid collation)
 	Assert(OidIsValid(collation));
 	Assert(collation != DEFAULT_COLLATION_OID);
 
-	if (collation_cache == NULL)
+	if (CollationCache == NULL)
 	{
-		/* First time through, initialize the hash table */
-		HASHCTL		ctl;
-
-		ctl.keysize = sizeof(Oid);
-		ctl.entrysize = sizeof(collation_cache_entry);
-		collation_cache = hash_create("Collation cache", 100, &ctl,
-									  HASH_ELEM | HASH_BLOBS);
+		CollationCacheContext = AllocSetContextCreate(TopMemoryContext,
+													  "collation cache",
+													  ALLOCSET_DEFAULT_SIZES);
+		CollationCache = collation_cache_create(
+			CollationCacheContext, 16, NULL);
 	}
 
-	cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
+	cache_entry = collation_cache_insert(CollationCache, collation, &found);
 	if (!found)
 	{
 		/*
-- 
2.34.1

Reply via email to