On Thu, Mar 25, 2021 at 10:31 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
> We already know that increasing the number of CLOG buffers above the
> current number hurts as the linear search begins to dominate
> (according to the commit message for 5364b357), and it doesn't seem
> great to ship a new feature that melts your CPU when you turn it up.
> Perhaps, to ship this, we need to introduce a buffer mapping table?  I
> have attached a "one coffee" attempt at that, on top of your v10 patch
> (unmodified), for discussion.  It survives basic testing but I don't
> know how it performs.

Hrrr... Cfbot showed an assertion failure.  Here's the two coffee
version with a couple of silly mistakes fixed.
From 4817d16cfb6704d43a7bef12648e753d239c809c Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amboro...@acm.org>
Date: Mon, 15 Feb 2021 21:51:56 +0500
Subject: [PATCH v11 1/2] Make all SLRU buffer sizes configurable

---
 doc/src/sgml/config.sgml                      | 108 ++++++++++++++++++
 src/backend/access/transam/clog.c             |   6 +
 src/backend/access/transam/commit_ts.c        |   5 +-
 src/backend/access/transam/multixact.c        |   8 +-
 src/backend/access/transam/subtrans.c         |   5 +-
 src/backend/commands/async.c                  |   8 +-
 src/backend/storage/lmgr/predicate.c          |   4 +-
 src/backend/utils/init/globals.c              |   8 ++
 src/backend/utils/misc/guc.c                  |  77 +++++++++++++
 src/backend/utils/misc/postgresql.conf.sample |  16 +++
 src/include/access/multixact.h                |   4 -
 src/include/access/subtrans.h                 |   3 -
 src/include/commands/async.h                  |   5 -
 src/include/miscadmin.h                       |   8 ++
 src/include/storage/predicate.h               |   4 -
 15 files changed, 240 insertions(+), 29 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ddc6d789d8..0adcf0efaf 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1886,6 +1886,114 @@ include_dir 'conf.d'
        </para>
       </listitem>
      </varlistentry>
+     
+    <varlistentry id="guc-multixact-offsets-slru-buffers" xreflabel="multixact_offsets_slru_buffers">
+      <term><varname>multixact_offsets_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>multixact_offsets_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for MultiXact offsets. MultiXact offsets
+        are used to store information about offsets of multiple row lockers (caused by SELECT FOR UPDATE and others).
+        It defaults to 64 kilobytes (<literal>64KB</literal>).
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    <varlistentry id="guc-multixact-members-slru-buffers" xreflabel="multixact_members_slru_buffers">
+      <term><varname>multixact_members_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>multixact_members_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for MultiXact members. MultiXact members
+        are used to store information about XIDs of multiple row lockers. Typically <varname>multixact_members_slru_buffers</varname>
+        is twice more than <varname>multixact_offsets_slru_buffers</varname>.
+        It defaults to 128 kilobytes (<literal>128KB</literal>).
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+    <varlistentry id="guc-subtrans-buffers" xreflabel="subtrans_slru_buffers">
+      <term><varname>subtrans_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>subtrans_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for subtransactions.
+        It defaults to 256 kilobytes (<literal>256KB</literal>).
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+    <varlistentry id="guc-notify-buffers" xreflabel="notify_slru_buffers">
+      <term><varname>notify_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>notify_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for asyncronous notifications (NOTIFY, LISTEN).
+        It defaults to 64 kilobytes (<literal>64KB</literal>).
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+    <varlistentry id="guc-serial-buffers" xreflabel="serial_slru_buffers">
+      <term><varname>serial_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>serial_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for predicate locks.
+        It defaults to 128 kilobytes (<literal>128KB</literal>).
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+    <varlistentry id="guc-clog-buffers" xreflabel="clog_slru_buffers">
+      <term><varname>clog_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>clog_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for CLOG.
+        It defaults to 0, in this case CLOG size is taken as <varname>shared_buffers</varname> / 512.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+    <varlistentry id="guc-commit-ts-buffers" xreflabel="commit_ts_slru_buffers">
+      <term><varname>commit_ts_slru_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>commit_ts_slru_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to be used for commit timestamps.
+        It defaults to 0, in this case CLOG size is taken as <varname>shared_buffers</varname> / 512.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
 
      <varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
       <term><varname>max_stack_depth</varname> (<type>integer</type>)
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 6fa4713fb4..e1d34aa361 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -659,6 +659,9 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 /*
  * Number of shared CLOG buffers.
  *
+ * If values is configured via GUC - just use given value. Otherwise
+ * apply following euristics.
+ *
  * On larger multi-processor systems, it is possible to have many CLOG page
  * requests in flight at one time which could lead to disk access for CLOG
  * page if the required page is not found in memory.  Testing revealed that we
@@ -675,6 +678,9 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 Size
 CLOGShmemBuffers(void)
 {
+	/* consider 0 and 1 as unset GUC */
+	if (clog_slru_buffers > 1)
+		return clog_slru_buffers;
 	return Min(128, Max(4, NBuffers / 512));
 }
 
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 48e8d66286..7de3bca63d 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -530,7 +530,10 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS)
 Size
 CommitTsShmemBuffers(void)
 {
-	return Min(16, Max(4, NBuffers / 1024));
+	/* consider 0 and 1 as unset GUC */
+	if (commit_ts_slru_buffers > 1)
+		return commit_ts_slru_buffers;
+	return Min(16, Max(4, NBuffers / 512));
 }
 
 /*
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 1f9f1a1fa1..370c01e72b 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1831,8 +1831,8 @@ MultiXactShmemSize(void)
 			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
 
 	size = SHARED_MULTIXACT_STATE_SIZE;
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0));
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0));
+	size = add_size(size, SimpleLruShmemSize(multixact_offsets_slru_buffers, 0));
+	size = add_size(size, SimpleLruShmemSize(multixact_members_slru_buffers, 0));
 
 	return size;
 }
@@ -1848,13 +1848,13 @@ MultiXactShmemInit(void)
 	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
 
 	SimpleLruInit(MultiXactOffsetCtl,
-				  "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0,
+				  "MultiXactOffset", multixact_offsets_slru_buffers, 0,
 				  MultiXactOffsetSLRULock, "pg_multixact/offsets",
 				  LWTRANCHE_MULTIXACTOFFSET_BUFFER,
 				  SYNC_HANDLER_MULTIXACT_OFFSET);
 	SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
 	SimpleLruInit(MultiXactMemberCtl,
-				  "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0,
+				  "MultiXactMember", multixact_offsets_slru_buffers, 0,
 				  MultiXactMemberSLRULock, "pg_multixact/members",
 				  LWTRANCHE_MULTIXACTMEMBER_BUFFER,
 				  SYNC_HANDLER_MULTIXACT_MEMBER);
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 6a8e521f89..0c24353d3a 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -31,6 +31,7 @@
 #include "access/slru.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
+#include "miscadmin.h"
 #include "pg_trace.h"
 #include "utils/snapmgr.h"
 
@@ -184,14 +185,14 @@ SubTransGetTopmostTransaction(TransactionId xid)
 Size
 SUBTRANSShmemSize(void)
 {
-	return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
+	return SimpleLruShmemSize(subtrans_slru_buffers, 0);
 }
 
 void
 SUBTRANSShmemInit(void)
 {
 	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
-	SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0,
+	SimpleLruInit(SubTransCtl, "Subtrans", subtrans_slru_buffers, 0,
 				  SubtransSLRULock, "pg_subtrans",
 				  LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE);
 	SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 4b16fb5682..f5c5592057 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -107,7 +107,7 @@
  * frontend during startup.)  The above design guarantees that notifies from
  * other backends will never be missed by ignoring self-notifies.
  *
- * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS)
+ * The amount of shared memory used for notify management (notify_slru_buffers)
  * can be varied without affecting anything but performance.  The maximum
  * amount of notification data that can be queued at one time is determined
  * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
@@ -225,7 +225,7 @@ typedef struct QueuePosition
  *
  * Resist the temptation to make this really large.  While that would save
  * work in some places, it would add cost in others.  In particular, this
- * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends
+ * should likely be less than notify_slru_buffers, to ensure that backends
  * catch up before the pages they'll need to read fall out of SLRU cache.
  */
 #define QUEUE_CLEANUP_DELAY 4
@@ -514,7 +514,7 @@ AsyncShmemSize(void)
 	size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
 	size = add_size(size, offsetof(AsyncQueueControl, backend));
 
-	size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
+	size = add_size(size, SimpleLruShmemSize(notify_slru_buffers, 0));
 
 	return size;
 }
@@ -562,7 +562,7 @@ AsyncShmemInit(void)
 	 * Set up SLRU management of the pg_notify data.
 	 */
 	NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
-	SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
+	SimpleLruInit(NotifyCtl, "Notify", notify_slru_buffers, 0,
 				  NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
 				  SYNC_HANDLER_NONE);
 
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index d493aeef0f..fad8cc572e 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -872,7 +872,7 @@ SerialInit(void)
 	 */
 	SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically;
 	SimpleLruInit(SerialSlruCtl, "Serial",
-				  NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial",
+				  serial_slru_buffers, 0, SerialSLRULock, "pg_serial",
 				  LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE);
 #ifdef USE_ASSERT_CHECKING
 	SerialPagePrecedesLogicallyUnitTests();
@@ -1395,7 +1395,7 @@ PredicateLockShmemSize(void)
 
 	/* Shared memory structures for SLRU tracking of old committed xids. */
 	size = add_size(size, sizeof(SerialControlData));
-	size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0));
+	size = add_size(size, SimpleLruShmemSize(serial_slru_buffers, 0));
 
 	return size;
 }
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 73e0a672ae..f163ca17e9 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -148,3 +148,11 @@ int64		VacuumPageDirty = 0;
 
 int			VacuumCostBalance = 0;	/* working state for vacuum */
 bool		VacuumCostActive = false;
+
+int			multixact_offsets_slru_buffers = 8;
+int			multixact_members_slru_buffers = 16;
+int			subtrans_slru_buffers = 32;
+int			notify_slru_buffers = 8;
+int			serial_slru_buffers = 16;
+int			clog_slru_buffers = 0;
+int			commit_ts_slru_buffers = 0;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 0c5dc4d3e8..b65a4ae9ce 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2305,6 +2305,83 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"multixact_offsets_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for MultiXact offsets SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&multixact_offsets_slru_buffers,
+		8, 2, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"multixact_members_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for MultiXact members SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&multixact_members_slru_buffers,
+		16, 2, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"subtrans_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for substransactions SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&subtrans_slru_buffers,
+		32, 2, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"notify_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for asyncronous notifications SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&notify_slru_buffers,
+		8, 2, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"serial_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for predicate locks SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&serial_slru_buffers,
+		16, 2, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"clog_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for commit log SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&serial_slru_buffers,
+		0, 0, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"commit_ts_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for commit timestamps SLRU."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&commit_ts_slru_buffers,
+		0, 0, INT_MAX / 2,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
 			gettext_noop("Sets the maximum number of temporary buffers used by each session."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b234a6bfe6..308fd565d3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -190,6 +190,22 @@
 					# (change requires restart)
 #backend_flush_after = 0		# measured in pages, 0 disables
 
+# - SLRU Buffers -
+
+#multixact_offsets_slru_buffers = 8	# memory used for MultiXact offsets
+					# (change requires restart)
+#multixact_members_slru_buffers = 16	# memory used for MultiXact members
+					# (change requires restart)
+#subtrans_slru_buffers = 32			# memory used for subtransactions
+					# (change requires restart)
+#notify_slru_buffers = 8			# memory used for asynchronous notifications
+					# (change requires restart)
+#serial_slru_buffers = 16			# memory used for predicate locks
+					# (change requires restart)
+#clog_slru_buffers = 0				# memory used for CLOG
+					# (change requires restart)
+#commit_ts_slru_buffers = 0			# memory used for commit timestamps
+					# (change requires restart)
 
 #------------------------------------------------------------------------------
 # WRITE-AHEAD LOG
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 4bbb035eae..97c0a46376 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -29,10 +29,6 @@
 
 #define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
 
-/* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS		8
-#define NUM_MULTIXACTMEMBER_BUFFERS		16
-
 /*
  * Possible multixact lock modes ("status").  The first four modes are for
  * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index d0ab44ae82..ca0999056e 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -11,9 +11,6 @@
 #ifndef SUBTRANS_H
 #define SUBTRANS_H
 
-/* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS	32
-
 extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
 extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 9217f66b91..fa831e3721 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -15,11 +15,6 @@
 
 #include <signal.h>
 
-/*
- * The number of SLRU page buffers we use for the notification queue.
- */
-#define NUM_NOTIFY_BUFFERS	8
-
 extern bool Trace_notify;
 extern volatile sig_atomic_t notifyInterruptPending;
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 013850ac28..3d9f585fb9 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -162,6 +162,14 @@ extern PGDLLIMPORT int MaxBackends;
 extern PGDLLIMPORT int MaxConnections;
 extern PGDLLIMPORT int max_worker_processes;
 extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int multixact_offsets_slru_buffers;
+extern PGDLLIMPORT int multixact_members_slru_buffers;
+extern PGDLLIMPORT int multixact_members_slru_buffers;
+extern PGDLLIMPORT int subtrans_slru_buffers;
+extern PGDLLIMPORT int notify_slru_buffers;
+extern PGDLLIMPORT int serial_slru_buffers;
+extern PGDLLIMPORT int clog_slru_buffers;
+extern PGDLLIMPORT int commit_ts_slru_buffers;
 
 extern PGDLLIMPORT int MyProcPid;
 extern PGDLLIMPORT pg_time_t MyStartTime;
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
index 152b698611..c72779bd88 100644
--- a/src/include/storage/predicate.h
+++ b/src/include/storage/predicate.h
@@ -26,10 +26,6 @@ extern int	max_predicate_locks_per_xact;
 extern int	max_predicate_locks_per_relation;
 extern int	max_predicate_locks_per_page;
 
-
-/* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS		16
-
 /*
  * A handle used for sharing SERIALIZABLEXACT objects between the participants
  * in a parallel query.
-- 
2.30.1

From 65600b53939c34abf43e62f3f59be5671c43d301 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 25 Mar 2021 10:11:31 +1300
Subject: [PATCH v11 2/2] Add buffer mapping table for SLRUs.

---
 src/backend/access/transam/slru.c | 87 ++++++++++++++++++++++++++++---
 src/include/access/slru.h         |  2 +
 2 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 82149ad782..487585bb60 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -58,6 +58,8 @@
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "storage/shmem.h"
+#include "utils/dynahash.h"
+#include "utils/hsearch.h"
 
 #define SlruFileName(ctl, path, seg) \
 	snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
@@ -79,6 +81,12 @@ typedef struct SlruWriteAllData
 
 typedef struct SlruWriteAllData *SlruWriteAll;
 
+typedef struct SlruMappingTableEntry
+{
+	int			pageno;
+	int			slotno;
+} SlruMappingTableEntry;
+
 /*
  * Populate a file tag describing a segment file.  We only use the segment
  * number, since we can derive everything else we need by having separate
@@ -146,6 +154,9 @@ static int	SlruSelectLRUPage(SlruCtl ctl, int pageno);
 static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
 									  int segpage, void *data);
 static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
+static void	SlruMappingAdd(SlruCtl ctl, int pageno, int slotno);
+static void	SlruMappingRemove(SlruCtl ctl, int pageno);
+static int	SlruMappingFind(SlruCtl ctl, int pageno);
 
 /*
  * Initialization of shared memory
@@ -168,7 +179,8 @@ SimpleLruShmemSize(int nslots, int nlsns)
 	if (nlsns > 0)
 		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
 
-	return BUFFERALIGN(sz) + BLCKSZ * nslots;
+	return BUFFERALIGN(sz) + BLCKSZ * nslots +
+		hash_estimate_size(nslots, sizeof(SlruMappingTableEntry));
 }
 
 /*
@@ -187,6 +199,9 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 			  LWLock *ctllock, const char *subdir, int tranche_id,
 			  SyncRequestHandler sync_handler)
 {
+	char		mapping_table_name[SHMEM_INDEX_KEYSIZE];
+	HASHCTL		mapping_table_info;
+	HTAB	   *mapping_table;
 	SlruShared	shared;
 	bool		found;
 
@@ -258,11 +273,21 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 	else
 		Assert(found);
 
+	/* Create or find the buffer mapping table. */
+	memset(&mapping_table_info, 0, sizeof(mapping_table_info));
+	mapping_table_info.keysize = sizeof(int);
+	mapping_table_info.entrysize = sizeof(SlruMappingTableEntry);
+	snprintf(mapping_table_name, sizeof(mapping_table_name),
+			 "%s Mapping Table", name);
+	mapping_table = ShmemInitHash(mapping_table_name, nslots, nslots,
+								  &mapping_table_info, HASH_ELEM | HASH_BLOBS);
+
 	/*
 	 * Initialize the unshared control struct, including directory path. We
 	 * assume caller set PagePrecedes.
 	 */
 	ctl->shared = shared;
+	ctl->mapping_table = mapping_table;
 	ctl->sync_handler = sync_handler;
 	strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
 }
@@ -289,6 +314,9 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno)
 		   shared->page_number[slotno] == pageno);
 
 	/* Mark the slot as containing this page */
+	if (shared->page_status[slotno] != SLRU_PAGE_EMPTY)
+		SlruMappingRemove(ctl, shared->page_number[slotno]);
+	SlruMappingAdd(ctl, pageno, slotno);
 	shared->page_number[slotno] = pageno;
 	shared->page_status[slotno] = SLRU_PAGE_VALID;
 	shared->page_dirty[slotno] = true;
@@ -362,7 +390,10 @@ SimpleLruWaitIO(SlruCtl ctl, int slotno)
 		{
 			/* indeed, the I/O must have failed */
 			if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
+			{
+				SlruMappingRemove(ctl, shared->page_number[slotno]);
 				shared->page_status[slotno] = SLRU_PAGE_EMPTY;
+			}
 			else				/* write_in_progress */
 			{
 				shared->page_status[slotno] = SLRU_PAGE_VALID;
@@ -436,6 +467,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 				!shared->page_dirty[slotno]));
 
 		/* Mark the slot read-busy */
+		SlruMappingAdd(ctl, pageno, slotno);
 		shared->page_number[slotno] = pageno;
 		shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
 		shared->page_dirty[slotno] = false;
@@ -459,7 +491,13 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 			   shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
 			   !shared->page_dirty[slotno]);
 
-		shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
+		if (ok)
+			shared->page_status[slotno] = SLRU_PAGE_VALID;
+		else
+		{
+			SlruMappingRemove(ctl, pageno);
+			shared->page_status[slotno] =  SLRU_PAGE_EMPTY;
+		}
 
 		LWLockRelease(&shared->buffer_locks[slotno].lock);
 
@@ -1029,11 +1067,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
 		int			best_invalid_page_number = 0;	/* keep compiler quiet */
 
 		/* See if page already has a buffer assigned */
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		slotno = SlruMappingFind(ctl, pageno);
+		if (slotno >= 0)
 		{
-			if (shared->page_number[slotno] == pageno &&
-				shared->page_status[slotno] != SLRU_PAGE_EMPTY)
-				return slotno;
+			Assert(shared->page_number[slotno] == pageno);
+			Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY);
+			return slotno;
 		}
 
 		/*
@@ -1266,6 +1305,7 @@ restart:;
 		if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
 			!shared->page_dirty[slotno])
 		{
+			SlruMappingRemove(ctl, shared->page_number[slotno]);
 			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
 			continue;
 		}
@@ -1348,6 +1388,7 @@ restart:
 		if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
 			!shared->page_dirty[slotno])
 		{
+			SlruMappingRemove(ctl, shared->page_number[slotno]);
 			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
 			continue;
 		}
@@ -1609,3 +1650,37 @@ SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
 	errno = save_errno;
 	return result;
 }
+
+static int
+SlruMappingFind(SlruCtl ctl, int pageno)
+{
+	SlruMappingTableEntry *mapping;
+
+	mapping = hash_search(ctl->mapping_table, &pageno, HASH_FIND, NULL);
+	if (mapping)
+		return mapping->slotno;
+
+	return -1;
+}
+
+static void
+SlruMappingAdd(SlruCtl ctl, int pageno, int slotno)
+{
+	SlruMappingTableEntry *mapping;
+	bool		found PG_USED_FOR_ASSERTS_ONLY;
+
+	mapping = hash_search(ctl->mapping_table, &pageno, HASH_ENTER, &found);
+	mapping->slotno = slotno;
+
+	Assert(!found);
+}
+
+static void
+SlruMappingRemove(SlruCtl ctl, int pageno)
+{
+	bool		found PG_USED_FOR_ASSERTS_ONLY;
+
+	hash_search(ctl->mapping_table, &pageno, HASH_REMOVE, &found);
+
+	Assert(found);
+}
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index dd52e8cec7..8aa3efc0ee 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -16,6 +16,7 @@
 #include "access/xlogdefs.h"
 #include "storage/lwlock.h"
 #include "storage/sync.h"
+#include "utils/hsearch.h"
 
 
 /*
@@ -110,6 +111,7 @@ typedef SlruSharedData *SlruShared;
 typedef struct SlruCtlData
 {
 	SlruShared	shared;
+	HTAB	   *mapping_table;
 
 	/*
 	 * Which sync handler function to use when handing sync requests over to
-- 
2.30.1

Reply via email to