Hi hackers,

A few years ago, there was a proposal to create hash tables for long
[sub]xip arrays in snapshots [0], but the thread seems to have fizzled out.
I was curious whether this idea still showed measurable benefits, so I
revamped the patch and ran the same test as before [1].  Here are the
results for 60â‚‹second runs on an r5d.24xlarge with the data directory on
the local NVMe storage:

     writers  HEAD  patch  diff
    ----------------------------
     16       659   664    +1%
     32       645   663    +3%
     64       659   692    +5%
     128      641   716    +12%
     256      619   610    -1%
     512      530   702    +32%
     768      469   582    +24%
     1000     367   577    +57%

As before, the hash table approach seems to provide a decent benefit at
higher client counts, so I felt it was worth reviving the idea.

The attached patch has some key differences from the previous proposal.
For example, the new patch uses simplehash instead of open-coding a new
hash table.  Also, I've bumped up the threshold for creating hash tables to
128 based on the results of my testing.  The attached patch waits until a
lookup of [sub]xip before generating the hash table, so we only need to
allocate enough space for the current elements in the [sub]xip array, and
we avoid allocating extra memory for workloads that do not need the hash
tables.  I'm slightly worried about increasing the number of memory
allocations in this code path, but the results above seemed encouraging on
that front.

Thoughts?

[0] https://postgr.es/m/35960b8af917e9268881cd8df3f88320%40postgrespro.ru
[1] https://postgr.es/m/057a9a95-19d2-05f0-17e2-f46ff20e9b3e%402ndquadrant.com

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From a7e63198030d8c77df1720a85f9eca6d1d5078b2 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathandboss...@gmail.com>
Date: Tue, 12 Jul 2022 11:39:41 -0700
Subject: [PATCH v1 1/1] Optimize lookups in snapshot "transactions in
 progress" arrays.

Presently, XidInMVCCSnapshot() performs linear searches through the
xip and subxip arrays.  This is ordinarily not too bad, but this
O(n) behavior may degrade performance for certain workloads at
higher client counts.  This change teaches XidInMVCCSnapshot() to
generate hash tables when the [sub]xip array is large, thereby
achieving O(1) lookup times at the expense of some extra memory.
These hash tables are regarded as ephemeral; they only live in
process-local memory and are never rewritten, copied, or
serialized.  This means that we only need to allocate enough room
for the current elements in [sub]xip, which should usually save
memory (but increase the number of allocations).  Another benefit
of this approach is that the hash tables are not allocated for
workloads that do not generate snapshots with long [sub]xip arrays.

A synthetic workload that generates snapshots with many transaction
IDs showed no regression in TPS at lower client counts, and it
provided over 50% improvement at higher client counts (i.e., 1000
connections).
---
 src/backend/storage/ipc/procarray.c |   7 +-
 src/backend/utils/time/snapmgr.c    | 111 ++++++++++++++++++++++------
 src/include/utils/snapshot.h        |  19 +++++
 3 files changed, 114 insertions(+), 23 deletions(-)

diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index dadaa958a8..e126eb72bb 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2544,12 +2544,15 @@ GetSnapshotData(Snapshot snapshot)
 	snapshot->curcid = GetCurrentCommandId(false);
 
 	/*
-	 * This is a new snapshot, so set both refcounts are zero, and mark it as
-	 * not copied in persistent memory.
+	 * This is a new snapshot, so set both refcounts are zero, mark it as not
+	 * copied in persistent memory, and mark the xip and subxip hash tables as
+	 * not built.
 	 */
 	snapshot->active_count = 0;
 	snapshot->regd_count = 0;
 	snapshot->copied = false;
+	snapshot->xiph = NULL;
+	snapshot->subxiph = NULL;
 
 	GetSnapshotDataInitOldSnapshot(snapshot);
 
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 5bc2a15160..c5fcfd254c 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -53,6 +53,7 @@
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "catalog/catalog.h"
+#include "common/hashfn.h"
 #include "datatype/timestamp.h"
 #include "lib/pairingheap.h"
 #include "miscadmin.h"
@@ -626,6 +627,8 @@ CopySnapshot(Snapshot snapshot)
 	newsnap->active_count = 0;
 	newsnap->copied = true;
 	newsnap->snapXactCompletionCount = 0;
+	newsnap->xiph = NULL;
+	newsnap->subxiph = NULL;
 
 	/* setup XID array */
 	if (snapshot->xcnt > 0)
@@ -667,6 +670,10 @@ FreeSnapshot(Snapshot snapshot)
 	Assert(snapshot->active_count == 0);
 	Assert(snapshot->copied);
 
+	if (snapshot->xiph)
+		xip_hash_destroy(snapshot->xiph);
+	if (snapshot->subxiph)
+		xip_hash_destroy(snapshot->subxiph);
 	pfree(snapshot);
 }
 
@@ -2233,6 +2240,8 @@ RestoreSnapshot(char *start_address)
 	snapshot->whenTaken = serialized_snapshot.whenTaken;
 	snapshot->lsn = serialized_snapshot.lsn;
 	snapshot->snapXactCompletionCount = 0;
+	snapshot->xiph = NULL;
+	snapshot->subxiph = NULL;
 
 	/* Copy XIDs, if present. */
 	if (serialized_snapshot.xcnt > 0)
@@ -2271,6 +2280,79 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
 	SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
 }
 
+/*
+ * Generation function definitions for xip and subxip hash tables.  The function
+ * protoypes and type declarations live in snapshot.h.
+ */
+#define SH_PREFIX xip_hash
+#define SH_ELEMENT_TYPE XipHashEntry
+#define SH_KEY_TYPE TransactionId
+#define SH_KEY xid
+#define SH_HASH_KEY(tb, key) murmurhash32(key)
+#define SH_EQUAL(tb, a, b) TransactionIdEquals(a, b)
+#define SH_SCOPE extern
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+/*
+ * If there are fewer than this many elements in the xip arrays in the snapshot,
+ * we won't bother creating a hash table to optimize further lookups, as it's
+ * unlikely to do much better than a linear search through the array.  It might
+ * even hurt performance.
+ *
+ * The current value worked well in testing, but it's still mostly a guessed-at
+ * number that might need updating in the future.
+ */
+#define XIP_HASH_MIN_ELEMENTS (128)
+
+/*
+ * Returns whether the xip array contains xid.
+ *
+ * If the number of elements is low, we just do a linear search through the
+ * array.  The extra overhead of doing anything fancier might be
+ * counterproductive.  If there are many elements in the array, we generate a
+ * hash table to optimize future lookups.
+ */
+static inline bool
+XidInXip(TransactionId xid, TransactionId *xip, uint32 xcnt,
+		 xip_hash_hash **xiph)
+{
+	/*
+	 * If the number of elements is low, just do a linear search through the
+	 * array.
+	 */
+	if (xcnt < XIP_HASH_MIN_ELEMENTS)
+	{
+		for (int i = 0; i < xcnt; i++)
+		{
+			if (TransactionIdEquals(xid, xip[i]))
+				return true;
+		}
+
+		return false;
+	}
+
+	/* Make sure the hash table is built. */
+	if (*xiph == NULL)
+	{
+		*xiph = xip_hash_create(TopTransactionContext, xcnt, NULL);
+
+		for (int i = 0; i < xcnt; i++)
+		{
+			bool		found;
+
+			(void) xip_hash_insert(*xiph, xip[i], &found);
+			Assert(!found);
+		}
+	}
+
+	/*
+	 * Use the hash table to see whether the xip array contains the transaction
+	 * ID.
+	 */
+	return (xip_hash_lookup(*xiph, xid) != NULL);
+}
+
 /*
  * XidInMVCCSnapshot
  *		Is the given XID still-in-progress according to the snapshot?
@@ -2284,8 +2366,6 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
 bool
 XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 {
-	uint32		i;
-
 	/*
 	 * Make a quick range check to eliminate most XIDs without looking at the
 	 * xip arrays.  Note that this is OK even if we convert a subxact XID to
@@ -2317,13 +2397,9 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 		if (!snapshot->suboverflowed)
 		{
 			/* we have full data, so search subxip */
-			int32		j;
-
-			for (j = 0; j < snapshot->subxcnt; j++)
-			{
-				if (TransactionIdEquals(xid, snapshot->subxip[j]))
-					return true;
-			}
+			if (XidInXip(xid, snapshot->subxip, snapshot->subxcnt,
+						 &snapshot->subxiph))
+				return true;
 
 			/* not there, fall through to search xip[] */
 		}
@@ -2344,16 +2420,11 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 				return false;
 		}
 
-		for (i = 0; i < snapshot->xcnt; i++)
-		{
-			if (TransactionIdEquals(xid, snapshot->xip[i]))
-				return true;
-		}
+		if (XidInXip(xid, snapshot->xip, snapshot->xcnt, &snapshot->xiph))
+			return true;
 	}
 	else
 	{
-		int32		j;
-
 		/*
 		 * In recovery we store all xids in the subxact array because it is by
 		 * far the bigger array, and we mostly don't know which xids are
@@ -2383,11 +2454,9 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 		 * indeterminate xid. We don't know whether it's top level or subxact
 		 * but it doesn't matter. If it's present, the xid is visible.
 		 */
-		for (j = 0; j < snapshot->subxcnt; j++)
-		{
-			if (TransactionIdEquals(xid, snapshot->subxip[j]))
-				return true;
-		}
+		if (XidInXip(xid, snapshot->subxip, snapshot->subxcnt,
+					 &snapshot->subxiph))
+			return true;
 	}
 
 	return false;
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 4e96f1afdc..73f2003174 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -122,6 +122,23 @@ typedef struct SnapshotData *Snapshot;
 
 #define InvalidSnapshot		((Snapshot) NULL)
 
+/*
+ * Generate function prototypes and type declarations for xip and subxip hash
+ * tables.  The function definitions live in snapmgr.c.
+ */
+typedef struct XipHashEntry
+{
+	TransactionId xid;
+	char		status;
+} XipHashEntry;
+
+#define SH_PREFIX xip_hash
+#define SH_ELEMENT_TYPE XipHashEntry
+#define SH_KEY_TYPE TransactionId
+#define SH_SCOPE extern
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
 /*
  * Struct representing all kind of possible snapshots.
  *
@@ -166,6 +183,7 @@ typedef struct SnapshotData
 	 * note: all ids in xip[] satisfy xmin <= xip[i] < xmax
 	 */
 	TransactionId *xip;
+	xip_hash_hash *xiph;		/* hash table for xip */
 	uint32		xcnt;			/* # of xact ids in xip[] */
 
 	/*
@@ -178,6 +196,7 @@ typedef struct SnapshotData
 	 * out any that are >= xmax
 	 */
 	TransactionId *subxip;
+	xip_hash_hash *subxiph;		/* hash table for subxip */
 	int32		subxcnt;		/* # of xact ids in subxip[] */
 	bool		suboverflowed;	/* has the subxip array overflowed? */
 
-- 
2.25.1

Reply via email to