Good day, all.

I did benchmark of patch on 2 socket Xeon 5220 CPU @ 2.20GHz .
I used "benchmark" used to reproduce problems with SLRU on our
customers setup.
In opposite to Shawn's tests I concentrated on bad case: a lot
of contention.

slru-funcs.sql - function definitions
  - functions creates a lot of subtrunsactions to stress subtrans
  - and select random rows for share to stress multixacts

slru-call.sql - function call for benchmark

slru-ballast.sql - randomly select 1000 consequent rows
    "for update skip locked" to stress multixacts

patch1 - make SLRU buffers configurable
patch2 - make "8-associative banks"

Benchmark done by pgbench.
Inited with scale 1 to induce contention.
    pgbench -i -s 1 testdb

Benchmark 1:
- low number of connections (50), 60% slru-call, 40% slru-ballast
    pgbench -f slru-call.sql@60 -f slru-ballast.sql@40 -c 50 -j 75 -P 1 -T 30 
testdb

version | subtrans | multixact | tps
        | buffers  | offs/memb | func+ballast
--------+----------+-----------+------
master  | 32       | 8/16      | 184+119
patch1  | 32       | 8/16      | 184+119
patch1  | 1024     | 8/16      | 121+77
patch1  | 1024     | 512/1024  | 118+75
patch2  | 32       | 8/16      | 190+122
patch2  | 1024     | 8/16      | 190+125
patch2  | 1024     | 512/1024  | 190+127

As you see, this test case degrades with dumb increase of
SLRU buffers. But use of "hash table" in form of "associative
buckets" makes performance stable.

Benchmark 2:
- high connection number (600), 98% slru-call, 2% slru-ballast
    pgbench -f slru-call.sql@98 -f slru-ballast.sql@2 -c 600 -j 75 -P 1 -T 30 
testdb

I don't paste "ballast" tps here since 2% make them too small,
and they're very noisy.

version | subtrans | multixact | tps
        | buffers  | offs/memb | func
--------+----------+-----------+------
master  | 32       | 8/16      | 13
patch1  | 32  
    | 8/16      | 13
patch1  | 1024     | 8/16      | 31
patch1  | 1024     | 512/1024  | 53
patch2  | 32       | 8/16      | 12
patch2  | 1024     | 8/16      | 34
patch2  | 1024     | 512/1024  | 67

In this case simple buffer increase does help. But "buckets"
increase performance gain.

I didn't paste here results third part of patch ("Pack SLRU...")
because I didn't see any major performance gain from it, and
it consumes large part of patch diff.

Rebased versions of first two patch parts are attached.

regards,

Yura Sokolov

Attachment: slru-ballast.sql
Description: application/sql

Attachment: slru-call.sql
Description: application/sql

Attachment: slru-func.sql
Description: application/sql

From 41ec9d1c54184c515d53ecc8021c4a998813f2a9 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amboro...@acm.org>
Date: Sun, 11 Apr 2021 21:18:10 +0300
Subject: [PATCH v21 2/2] Divide SLRU buffers into 8-associative banks

We want to eliminate linear search within SLRU buffers.
To do so we divide SLRU buffers into banks. Each bank holds
approximately 8 buffers. Each SLRU pageno may reside only in one bank.
Adjacent pagenos reside in different banks.
---
 src/backend/access/transam/slru.c | 43 ++++++++++++++++++++++++++++---
 src/include/access/slru.h         |  2 ++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index b65cb49d7ff..abc534bbd06 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -134,7 +134,7 @@ typedef enum
 static SlruErrorCause slru_errcause;
 static int	slru_errno;
 
-
+static void SlruAdjustNSlots(int* nslots, int* banksize, int* bankoffset);
 static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
 static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
 static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
@@ -148,6 +148,30 @@ static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
 									  int segpage, void *data);
 static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
 
+/*
+ * Pick bank size optimal for N-assiciative SLRU buffers.
+ * We expect bank number to be picked from lowest bits of requested pageno.
+ * Thus we want number of banks to be power of 2. This routine computes number
+ * of banks aiming to make each bank of size 8. So we can pack page number and
+ * statuses of each bank on one cacheline.
+ */
+static void SlruAdjustNSlots(int* nslots, int* banksize, int* bankoffset)
+{
+	int nbanks = 1;
+	*banksize = *nslots;
+	*bankoffset = 0;
+	while (*banksize > 15)
+	{
+		if ((*banksize & 1) != 0)
+			*banksize +=1;
+		*banksize /= 2;
+		nbanks *= 2;
+		*bankoffset += 1;
+	}
+	elog(DEBUG5, "nslots %d banksize %d nbanks %d ", *nslots, *banksize, nbanks);
+	*nslots = *banksize * nbanks;
+}
+
 /*
  * Initialization of shared memory
  */
@@ -156,6 +180,8 @@ Size
 SimpleLruShmemSize(int nslots, int nlsns)
 {
 	Size		sz;
+	int bankoffset, banksize;
+	SlruAdjustNSlots(&nslots, &banksize, &bankoffset);
 
 	/* we assume nslots isn't so large as to risk overflow */
 	sz = MAXALIGN(sizeof(SlruSharedData));
@@ -190,6 +216,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 {
 	SlruShared	shared;
 	bool		found;
+	int bankoffset, banksize;
+	SlruAdjustNSlots(&nslots, &banksize, &bankoffset);
 
 	shared = (SlruShared) ShmemInitStruct(name,
 										  SimpleLruShmemSize(nslots, nlsns),
@@ -209,6 +237,9 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 		shared->ControlLock = ctllock;
 
 		shared->num_slots = nslots;
+		shared->bank_mask =  (1 << bankoffset) - 1;
+		shared->bank_size = banksize;
+		
 		shared->lsn_groups_per_page = nlsns;
 
 		shared->cur_lru_count = 0;
@@ -496,12 +527,14 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
 {
 	SlruShared	shared = ctl->shared;
 	int			slotno;
+	int			bankstart = (pageno & shared->bank_mask) * shared->bank_size;
+	int			bankend = bankstart + shared->bank_size;
 
 	/* Try to find the page while holding only shared lock */
 	LWLockAcquire(shared->ControlLock, LW_SHARED);
 
 	/* See if page is already in a buffer */
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
+	for (slotno = bankstart; slotno < bankend; slotno++)
 	{
 		if (shared->page_number[slotno] == pageno &&
 			shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
@@ -1030,7 +1063,9 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
 		int			best_invalid_page_number = 0;	/* keep compiler quiet */
 
 		/* See if page already has a buffer assigned */
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		int bankstart = (pageno & shared->bank_mask) * shared->bank_size;
+		int bankend = bankstart + shared->bank_size;
+		for (slotno = bankstart; slotno < bankend; slotno++)
 		{
 			if (shared->page_number[slotno] == pageno &&
 				shared->page_status[slotno] != SLRU_PAGE_EMPTY)
@@ -1065,7 +1100,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
 		 * multiple pages with the same lru_count.
 		 */
 		cur_count = (shared->cur_lru_count)++;
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
+		for (slotno = bankstart; slotno < bankend; slotno++)
 		{
 			int			this_delta;
 			int			this_page_number;
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index b7e2e2b55e3..cc9cc10d271 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -61,6 +61,8 @@ typedef struct SlruSharedData
 
 	/* Number of buffers managed by this SLRU structure */
 	int			num_slots;
+	int			bank_size;
+	int			bank_mask;
 
 	/*
 	 * Arrays holding info for each buffer slot.  Page number is undefined
-- 
2.30.2

From e5f03ebac800f06f45c9ecb5f7139c96032e7fa7 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amboro...@acm.org>
Date: Mon, 15 Feb 2021 21:51:56 +0500
Subject: [PATCH v21 1/2] Make all SLRU buffer sizes configurable.

Provide new GUCs to set the number of buffers, instead of using hard
coded defaults.

Remove the limits on xact_buffers and commit_ts_buffers.  The default
sizes for those caches are ~0.2% and ~0.1% of shared_buffers, as before,
but now there is no cap at 128 and 16 buffers respectively (unless
track_commit_timestamp is disabled, in the latter case, then we might as
well keep it tiny).  Sizes much larger than the old limits have been
shown to be useful on modern systems, and an earlier commit replaced a
linear search with a hash table to avoid problems with extreme cases.

Author: Andrey M. Borodin <x4...@yandex-team.ru>
Reviewed-by: Anastasia Lubennikova <a.lubennik...@postgrespro.ru>
Reviewed-by: Tomas Vondra <tomas.von...@2ndquadrant.com>
Reviewed-by: Alexander Korotkov <aekorot...@gmail.com>
Reviewed-by: Gilles Darold <gil...@darold.net>
Reviewed-by: Thomas Munro <thomas.mu...@gmail.com>
Discussion: https://postgr.es/m/2BEC2B3F-9B61-4C1D-9FB5-5FAB0F05EF86%40yandex-team.ru
---
 doc/src/sgml/config.sgml                      | 135 ++++++++++++++++++
 src/backend/access/transam/clog.c             |  23 ++-
 src/backend/access/transam/commit_ts.c        |   5 +
 src/backend/access/transam/multixact.c        |   8 +-
 src/backend/access/transam/subtrans.c         |   5 +-
 src/backend/commands/async.c                  |   8 +-
 src/backend/storage/lmgr/predicate.c          |   4 +-
 src/backend/utils/init/globals.c              |   8 ++
 src/backend/utils/misc/guc.c                  |  99 +++++++++++++
 src/backend/utils/misc/postgresql.conf.sample |   9 ++
 src/include/access/clog.h                     |  10 ++
 src/include/access/commit_ts.h                |   1 -
 src/include/access/multixact.h                |   4 -
 src/include/access/slru.h                     |   5 +
 src/include/access/subtrans.h                 |   2 -
 src/include/commands/async.h                  |   5 -
 src/include/miscadmin.h                       |   7 +
 src/include/storage/predicate.h               |   4 -
 18 files changed, 299 insertions(+), 43 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e2d728e0c4f..991dba69757 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1961,6 +1961,141 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-multixact-offsets-buffers" xreflabel="multixact_offsets_buffers">
+      <term><varname>multixact_offsets_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>multixact_offsets_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of shared memory to use to cache the contents
+        of <literal>pg_multixact/offsets</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>8</literal>.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-multixact-members-buffers" xreflabel="multixact_members_buffers">
+      <term><varname>multixact_members_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>multixact_members_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of shared memory to use to cache the contents
+        of <literal>pg_multixact/members</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>16</literal>.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-subtrans-buffers" xreflabel="subtrans_buffers">
+      <term><varname>subtrans_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>subtrans_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of shared memory to use to cache the contents
+        of <literal>pg_subtrans</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>8</literal>.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-notify-buffers" xreflabel="notify_buffers">
+      <term><varname>notify_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>notify_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of shared memory to use to cache the contents
+        of <literal>pg_notify</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>8</literal>.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-serial-buffers" xreflabel="serial_buffers">
+      <term><varname>serial_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>serial_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of shared memory to use to cache the contents
+        of <literal>pg_serial</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>16</literal>.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-xact-buffers" xreflabel="xact_buffers">
+      <term><varname>xact_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>xact_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of shared memory to use to cache the contents
+        of <literal>pg_xact</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>0</literal>, which requests
+        <varname>shared_buffers</varname> / 512, but not fewer than 4 blocks.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-commit-ts-buffers" xreflabel="commit_ts_buffers">
+      <term><varname>commit_ts_buffers</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>commit_ts_buffers</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the amount of memory to use to cache the cotents of
+        <literal>pg_commit_ts</literal> (see
+        <xref linkend="pgdata-contents-table"/>).
+        If this value is specified without units, it is taken as blocks,
+        that is <symbol>BLCKSZ</symbol> bytes, typically 8kB.
+        The default value is <literal>0</literal>, which requests
+        <varname>shared_buffers</varname> / 1024, but not fewer than 4 blocks.
+        This parameter can only be set at server start.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth">
       <term><varname>max_stack_depth</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 3d9088a7048..e96a75d6959 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -58,8 +58,8 @@
 
 /* We need two bits per xact, so four xacts fit in a byte */
 #define CLOG_BITS_PER_XACT	2
-#define CLOG_XACTS_PER_BYTE 4
-#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
+StaticAssertDecl((CLOG_BITS_PER_XACT * CLOG_XACTS_PER_BYTE) == BITS_PER_BYTE,
+				 "CLOG_BITS_PER_XACT and CLOG_XACTS_PER_BYTE are inconsistent");
 #define CLOG_XACT_BITMASK	((1 << CLOG_BITS_PER_XACT) - 1)
 
 #define TransactionIdToPage(xid)	((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
@@ -665,23 +665,16 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 /*
  * Number of shared CLOG buffers.
  *
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory.  Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations.  The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
+ * By default, we'll use 2MB of for every 1GB of shared buffers, up to the
+ * theoretical maximum useful value, but always at least 4 buffers.
  */
 Size
 CLOGShmemBuffers(void)
 {
-	return Min(128, Max(4, NBuffers / 512));
+	/* Use configured value if provided. */
+	if (xact_buffers > 0)
+		return Max(4, xact_buffers);
+	return Min(CLOG_MAX_ALLOWED_BUFFERS, Max(4, NBuffers / 512));
 }
 
 /*
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 4dc8d402bd3..48ca5007475 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -514,10 +514,15 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS)
  * We use a very similar logic as for the number of CLOG buffers (except we
  * scale up twice as fast with shared buffers, and the maximum is twice as
  * high); see comments in CLOGShmemBuffers.
+ * By default, we'll use 1MB of for every 1GB of shared buffers, up to the
+ * maximum value that slru.c will allow, but always at least 4 buffers.
  */
 Size
 CommitTsShmemBuffers(void)
 {
+	/* Use configured value if provided. */
+	if (commit_ts_buffers > 0)
+		return Max(4, commit_ts_buffers);
 	return Min(256, Max(4, NBuffers / 256));
 }
 
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 8f7d12950e5..71d17e338e9 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1834,8 +1834,8 @@ MultiXactShmemSize(void)
 			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
 
 	size = SHARED_MULTIXACT_STATE_SIZE;
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0));
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0));
+	size = add_size(size, SimpleLruShmemSize(multixact_offsets_buffers, 0));
+	size = add_size(size, SimpleLruShmemSize(multixact_members_buffers, 0));
 
 	return size;
 }
@@ -1851,13 +1851,13 @@ MultiXactShmemInit(void)
 	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
 
 	SimpleLruInit(MultiXactOffsetCtl,
-				  "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0,
+				  "MultiXactOffset", multixact_offsets_buffers, 0,
 				  MultiXactOffsetSLRULock, "pg_multixact/offsets",
 				  LWTRANCHE_MULTIXACTOFFSET_BUFFER,
 				  SYNC_HANDLER_MULTIXACT_OFFSET);
 	SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
 	SimpleLruInit(MultiXactMemberCtl,
-				  "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0,
+				  "MultiXactMember", multixact_offsets_buffers, 0,
 				  MultiXactMemberSLRULock, "pg_multixact/members",
 				  LWTRANCHE_MULTIXACTMEMBER_BUFFER,
 				  SYNC_HANDLER_MULTIXACT_MEMBER);
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 66d35481552..862b0eab966 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -31,6 +31,7 @@
 #include "access/slru.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
+#include "miscadmin.h"
 #include "pg_trace.h"
 #include "utils/snapmgr.h"
 
@@ -184,14 +185,14 @@ SubTransGetTopmostTransaction(TransactionId xid)
 Size
 SUBTRANSShmemSize(void)
 {
-	return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
+	return SimpleLruShmemSize(subtrans_buffers, 0);
 }
 
 void
 SUBTRANSShmemInit(void)
 {
 	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
-	SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0,
+	SimpleLruInit(SubTransCtl, "Subtrans", subtrans_buffers, 0,
 				  SubtransSLRULock, "pg_subtrans",
 				  LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE);
 	SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 3e1b92df030..6b980603302 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -117,7 +117,7 @@
  * frontend during startup.)  The above design guarantees that notifies from
  * other backends will never be missed by ignoring self-notifies.
  *
- * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS)
+ * The amount of shared memory used for notify management (notify_buffers)
  * can be varied without affecting anything but performance.  The maximum
  * amount of notification data that can be queued at one time is determined
  * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
@@ -235,7 +235,7 @@ typedef struct QueuePosition
  *
  * Resist the temptation to make this really large.  While that would save
  * work in some places, it would add cost in others.  In particular, this
- * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends
+ * should likely be less than notify_buffers, to ensure that backends
  * catch up before the pages they'll need to read fall out of SLRU cache.
  */
 #define QUEUE_CLEANUP_DELAY 4
@@ -521,7 +521,7 @@ AsyncShmemSize(void)
 	size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
 	size = add_size(size, offsetof(AsyncQueueControl, backend));
 
-	size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
+	size = add_size(size, SimpleLruShmemSize(notify_buffers, 0));
 
 	return size;
 }
@@ -569,7 +569,7 @@ AsyncShmemInit(void)
 	 * Set up SLRU management of the pg_notify data.
 	 */
 	NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
-	SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
+	SimpleLruInit(NotifyCtl, "Notify", notify_buffers, 0,
 				  NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
 				  SYNC_HANDLER_NONE);
 
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 5136da6ea36..54a0d66d579 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -872,7 +872,7 @@ SerialInit(void)
 	 */
 	SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically;
 	SimpleLruInit(SerialSlruCtl, "Serial",
-				  NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial",
+				  serial_buffers, 0, SerialSLRULock, "pg_serial",
 				  LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE);
 #ifdef USE_ASSERT_CHECKING
 	SerialPagePrecedesLogicallyUnitTests();
@@ -1396,7 +1396,7 @@ PredicateLockShmemSize(void)
 
 	/* Shared memory structures for SLRU tracking of old committed xids. */
 	size = add_size(size, sizeof(SerialControlData));
-	size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0));
+	size = add_size(size, SimpleLruShmemSize(serial_buffers, 0));
 
 	return size;
 }
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 1a5d29ac9ba..cc0ca91a8b3 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -151,3 +151,11 @@ int64		VacuumPageDirty = 0;
 
 int			VacuumCostBalance = 0;	/* working state for vacuum */
 bool		VacuumCostActive = false;
+
+int			multixact_offsets_buffers = 8;
+int			multixact_members_buffers = 16;
+int			subtrans_buffers = 32;
+int			notify_buffers = 8;
+int			serial_buffers = 16;
+int			xact_buffers = 0;
+int			commit_ts_buffers = 0;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index af4a1c30689..2d9121b9265 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -32,9 +32,11 @@
 #endif
 #include <unistd.h>
 
+#include "access/clog.h"
 #include "access/commit_ts.h"
 #include "access/gin.h"
 #include "access/rmgr.h"
+#include "access/slru.h"
 #include "access/tableam.h"
 #include "access/toast_compression.h"
 #include "access/transam.h"
@@ -209,6 +211,8 @@ static const char *show_tcp_keepalives_idle(void);
 static const char *show_tcp_keepalives_interval(void);
 static const char *show_tcp_keepalives_count(void);
 static const char *show_tcp_user_timeout(void);
+static const char *show_xact_buffers(void);
+static const char *show_commit_ts_buffers(void);
 static bool check_maxconnections(int *newval, void **extra, GucSource source);
 static bool check_max_worker_processes(int *newval, void **extra, GucSource source);
 static bool check_autovacuum_max_workers(int *newval, void **extra, GucSource source);
@@ -2426,6 +2430,83 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"multixact_offsets_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for the MultiXact offset SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&multixact_offsets_buffers,
+		8, 2, SLRU_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"multixact_members_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for the MultiXact member SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&multixact_members_buffers,
+		16, 2, SLRU_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"subtrans_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for the sub-transaction SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&subtrans_buffers,
+		32, 2, SLRU_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"notify_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for the NOTIFY message SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&notify_buffers,
+		8, 2, SLRU_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"serial_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for the serializable transaction SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&serial_buffers,
+		16, 2, SLRU_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"xact_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the number of shared memory buffers used for the transaction status SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&xact_buffers,
+		0, 0, CLOG_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, show_xact_buffers
+	},
+
+	{
+		{"commit_ts_buffers", PGC_POSTMASTER, RESOURCES_MEM,
+			gettext_noop("Sets the size of the dedicated buffer pool used for the commit timestamp SLRU cache."),
+			NULL,
+			GUC_UNIT_BLOCKS
+		},
+		&commit_ts_buffers,
+		0, 0, SLRU_MAX_ALLOWED_BUFFERS,
+		NULL, NULL, show_commit_ts_buffers
+	},
+
 	{
 		{"temp_buffers", PGC_USERSET, RESOURCES_MEM,
 			gettext_noop("Sets the maximum number of temporary buffers used by each session."),
@@ -12447,6 +12528,24 @@ show_tcp_user_timeout(void)
 	return nbuf;
 }
 
+static const char *
+show_xact_buffers(void)
+{
+	static char nbuf[16];
+
+	snprintf(nbuf, sizeof(nbuf), "%zu", CLOGShmemBuffers());
+	return nbuf;
+}
+
+static const char *
+show_commit_ts_buffers(void)
+{
+	static char nbuf[16];
+
+	snprintf(nbuf, sizeof(nbuf), "%zu", CommitTsShmemBuffers());
+	return nbuf;
+}
+
 static bool
 check_maxconnections(int *newval, void **extra, GucSource source)
 {
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b4bc06e5f5a..2cb827e1e3b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -195,6 +195,15 @@
 #old_snapshot_threshold = -1		# 1min-60d; -1 disables; 0 is immediate
 					# (change requires restart)
 
+# - SLRU Buffers (change requires restart) -
+
+#xact_buffers = 0			# memory for pg_xact (0 = auto)
+#subtrans_buffers = 32			# memory for pg_subtrans
+#multixact_offsets_buffers = 8		# memory for pg_multixact/offsets
+#multixact_members_buffers = 16		# memory for pg_multixact/members
+#notify_buffers = 8			# memory for pg_notify
+#serial_buffers = 16			# memory for pg_serial
+#commit_ts_buffers = 0			# memory for pg_commit_ts (0 = auto)
 
 #------------------------------------------------------------------------------
 # WRITE-AHEAD LOG
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index 543f2e2643a..17d103aa4da 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -15,6 +15,16 @@
 #include "storage/sync.h"
 #include "lib/stringinfo.h"
 
+/*
+ * Don't allow xact_buffers to be set higher than could possibly be useful or
+ * SLRU would allow.
+ */
+#define CLOG_XACTS_PER_BYTE 4
+#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
+#define CLOG_MAX_ALLOWED_BUFFERS \
+	Min(SLRU_MAX_ALLOWED_BUFFERS, \
+		(((MaxTransactionId / 2) + (CLOG_XACTS_PER_PAGE - 1)) / CLOG_XACTS_PER_PAGE))
+
 /*
  * Possible transaction statuses --- note that all-zeroes is the initial
  * state.
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
index 7662f8e1a9c..d928dcc9352 100644
--- a/src/include/access/commit_ts.h
+++ b/src/include/access/commit_ts.h
@@ -16,7 +16,6 @@
 #include "replication/origin.h"
 #include "storage/sync.h"
 
-
 extern PGDLLIMPORT bool track_commit_timestamp;
 
 extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index a5600a320ae..da7b8f0abb9 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -29,10 +29,6 @@
 
 #define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
 
-/* Number of SLRU buffers to use for multixact */
-#define NUM_MULTIXACTOFFSET_BUFFERS		8
-#define NUM_MULTIXACTMEMBER_BUFFERS		16
-
 /*
  * Possible multixact lock modes ("status").  The first four modes are for
  * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 130c41c8632..b7e2e2b55e3 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -17,6 +17,11 @@
 #include "storage/lwlock.h"
 #include "storage/sync.h"
 
+/*
+ * To avoid overflowing internal arithmetic and the size_t data type, the
+ * number of buffers should not exceed this number.
+ */
+#define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
 
 /*
  * Define SLRU segment size.  A page is the same BLCKSZ as is used everywhere
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index f94e116640b..1ddb62883b0 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -11,8 +11,6 @@
 #ifndef SUBTRANS_H
 #define SUBTRANS_H
 
-/* Number of SLRU buffers to use for subtrans */
-#define NUM_SUBTRANS_BUFFERS	32
 
 extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 926af933d1b..402d184b9b3 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -15,11 +15,6 @@
 
 #include <signal.h>
 
-/*
- * The number of SLRU page buffers we use for the notification queue.
- */
-#define NUM_NOTIFY_BUFFERS	8
-
 extern PGDLLIMPORT bool Trace_notify;
 extern PGDLLIMPORT volatile sig_atomic_t notifyInterruptPending;
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index ea9a56d3955..e65990e04e8 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -177,6 +177,13 @@ extern PGDLLIMPORT int MaxBackends;
 extern PGDLLIMPORT int MaxConnections;
 extern PGDLLIMPORT int max_worker_processes;
 extern PGDLLIMPORT int max_parallel_workers;
+extern PGDLLIMPORT int multixact_offsets_buffers;
+extern PGDLLIMPORT int multixact_members_buffers;
+extern PGDLLIMPORT int subtrans_buffers;
+extern PGDLLIMPORT int notify_buffers;
+extern PGDLLIMPORT int serial_buffers;
+extern PGDLLIMPORT int xact_buffers;
+extern PGDLLIMPORT int commit_ts_buffers;
 
 extern PGDLLIMPORT int MyProcPid;
 extern PGDLLIMPORT pg_time_t MyStartTime;
diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h
index 8dfcb3944b4..1e3f757ec30 100644
--- a/src/include/storage/predicate.h
+++ b/src/include/storage/predicate.h
@@ -26,10 +26,6 @@ extern PGDLLIMPORT int max_predicate_locks_per_xact;
 extern PGDLLIMPORT int max_predicate_locks_per_relation;
 extern PGDLLIMPORT int max_predicate_locks_per_page;
 
-
-/* Number of SLRU buffers to use for Serial SLRU */
-#define NUM_SERIAL_BUFFERS		16
-
 /*
  * A handle used for sharing SERIALIZABLEXACT objects between the participants
  * in a parallel query.
-- 
2.30.2

Reply via email to