On Thu, Mar 25, 2021 at 10:31 AM Thomas Munro <thomas.mu...@gmail.com> wrote: > We already know that increasing the number of CLOG buffers above the > current number hurts as the linear search begins to dominate > (according to the commit message for 5364b357), and it doesn't seem > great to ship a new feature that melts your CPU when you turn it up. > Perhaps, to ship this, we need to introduce a buffer mapping table? I > have attached a "one coffee" attempt at that, on top of your v10 patch > (unmodified), for discussion. It survives basic testing but I don't > know how it performs.
Hrrr... Cfbot showed an assertion failure. Here's the two coffee version with a couple of silly mistakes fixed.
From 4817d16cfb6704d43a7bef12648e753d239c809c Mon Sep 17 00:00:00 2001 From: Andrey Borodin <amboro...@acm.org> Date: Mon, 15 Feb 2021 21:51:56 +0500 Subject: [PATCH v11 1/2] Make all SLRU buffer sizes configurable --- doc/src/sgml/config.sgml | 108 ++++++++++++++++++ src/backend/access/transam/clog.c | 6 + src/backend/access/transam/commit_ts.c | 5 +- src/backend/access/transam/multixact.c | 8 +- src/backend/access/transam/subtrans.c | 5 +- src/backend/commands/async.c | 8 +- src/backend/storage/lmgr/predicate.c | 4 +- src/backend/utils/init/globals.c | 8 ++ src/backend/utils/misc/guc.c | 77 +++++++++++++ src/backend/utils/misc/postgresql.conf.sample | 16 +++ src/include/access/multixact.h | 4 - src/include/access/subtrans.h | 3 - src/include/commands/async.h | 5 - src/include/miscadmin.h | 8 ++ src/include/storage/predicate.h | 4 - 15 files changed, 240 insertions(+), 29 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index ddc6d789d8..0adcf0efaf 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1886,6 +1886,114 @@ include_dir 'conf.d' </para> </listitem> </varlistentry> + + <varlistentry id="guc-multixact-offsets-slru-buffers" xreflabel="multixact_offsets_slru_buffers"> + <term><varname>multixact_offsets_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>multixact_offsets_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for MultiXact offsets. MultiXact offsets + are used to store information about offsets of multiple row lockers (caused by SELECT FOR UPDATE and others). + It defaults to 64 kilobytes (<literal>64KB</literal>). + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-multixact-members-slru-buffers" xreflabel="multixact_members_slru_buffers"> + <term><varname>multixact_members_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>multixact_members_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for MultiXact members. MultiXact members + are used to store information about XIDs of multiple row lockers. Typically <varname>multixact_members_slru_buffers</varname> + is twice more than <varname>multixact_offsets_slru_buffers</varname>. + It defaults to 128 kilobytes (<literal>128KB</literal>). + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-subtrans-buffers" xreflabel="subtrans_slru_buffers"> + <term><varname>subtrans_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>subtrans_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for subtransactions. + It defaults to 256 kilobytes (<literal>256KB</literal>). + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-notify-buffers" xreflabel="notify_slru_buffers"> + <term><varname>notify_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>notify_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for asyncronous notifications (NOTIFY, LISTEN). + It defaults to 64 kilobytes (<literal>64KB</literal>). + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-serial-buffers" xreflabel="serial_slru_buffers"> + <term><varname>serial_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>serial_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for predicate locks. + It defaults to 128 kilobytes (<literal>128KB</literal>). + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-clog-buffers" xreflabel="clog_slru_buffers"> + <term><varname>clog_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>clog_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for CLOG. + It defaults to 0, in this case CLOG size is taken as <varname>shared_buffers</varname> / 512. + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-commit-ts-buffers" xreflabel="commit_ts_slru_buffers"> + <term><varname>commit_ts_slru_buffers</varname> (<type>integer</type>) + <indexterm> + <primary><varname>commit_ts_slru_buffers</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the amount of memory to be used for commit timestamps. + It defaults to 0, in this case CLOG size is taken as <varname>shared_buffers</varname> / 512. + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> <varlistentry id="guc-max-stack-depth" xreflabel="max_stack_depth"> <term><varname>max_stack_depth</varname> (<type>integer</type>) diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 6fa4713fb4..e1d34aa361 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -659,6 +659,9 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) /* * Number of shared CLOG buffers. * + * If values is configured via GUC - just use given value. Otherwise + * apply following euristics. + * * On larger multi-processor systems, it is possible to have many CLOG page * requests in flight at one time which could lead to disk access for CLOG * page if the required page is not found in memory. Testing revealed that we @@ -675,6 +678,9 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) Size CLOGShmemBuffers(void) { + /* consider 0 and 1 as unset GUC */ + if (clog_slru_buffers > 1) + return clog_slru_buffers; return Min(128, Max(4, NBuffers / 512)); } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 48e8d66286..7de3bca63d 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -530,7 +530,10 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS) Size CommitTsShmemBuffers(void) { - return Min(16, Max(4, NBuffers / 1024)); + /* consider 0 and 1 as unset GUC */ + if (commit_ts_slru_buffers > 1) + return commit_ts_slru_buffers; + return Min(16, Max(4, NBuffers / 512)); } /* diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 1f9f1a1fa1..370c01e72b 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1831,8 +1831,8 @@ MultiXactShmemSize(void) mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot)) size = SHARED_MULTIXACT_STATE_SIZE; - size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0)); - size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0)); + size = add_size(size, SimpleLruShmemSize(multixact_offsets_slru_buffers, 0)); + size = add_size(size, SimpleLruShmemSize(multixact_members_slru_buffers, 0)); return size; } @@ -1848,13 +1848,13 @@ MultiXactShmemInit(void) MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes; SimpleLruInit(MultiXactOffsetCtl, - "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0, + "MultiXactOffset", multixact_offsets_slru_buffers, 0, MultiXactOffsetSLRULock, "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER, SYNC_HANDLER_MULTIXACT_OFFSET); SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE); SimpleLruInit(MultiXactMemberCtl, - "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0, + "MultiXactMember", multixact_offsets_slru_buffers, 0, MultiXactMemberSLRULock, "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER, SYNC_HANDLER_MULTIXACT_MEMBER); diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 6a8e521f89..0c24353d3a 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -31,6 +31,7 @@ #include "access/slru.h" #include "access/subtrans.h" #include "access/transam.h" +#include "miscadmin.h" #include "pg_trace.h" #include "utils/snapmgr.h" @@ -184,14 +185,14 @@ SubTransGetTopmostTransaction(TransactionId xid) Size SUBTRANSShmemSize(void) { - return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0); + return SimpleLruShmemSize(subtrans_slru_buffers, 0); } void SUBTRANSShmemInit(void) { SubTransCtl->PagePrecedes = SubTransPagePrecedes; - SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0, + SimpleLruInit(SubTransCtl, "Subtrans", subtrans_slru_buffers, 0, SubtransSLRULock, "pg_subtrans", LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE); SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE); diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 4b16fb5682..f5c5592057 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -107,7 +107,7 @@ * frontend during startup.) The above design guarantees that notifies from * other backends will never be missed by ignoring self-notifies. * - * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS) + * The amount of shared memory used for notify management (notify_slru_buffers) * can be varied without affecting anything but performance. The maximum * amount of notification data that can be queued at one time is determined * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below. @@ -225,7 +225,7 @@ typedef struct QueuePosition * * Resist the temptation to make this really large. While that would save * work in some places, it would add cost in others. In particular, this - * should likely be less than NUM_NOTIFY_BUFFERS, to ensure that backends + * should likely be less than notify_slru_buffers, to ensure that backends * catch up before the pages they'll need to read fall out of SLRU cache. */ #define QUEUE_CLEANUP_DELAY 4 @@ -514,7 +514,7 @@ AsyncShmemSize(void) size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus)); size = add_size(size, offsetof(AsyncQueueControl, backend)); - size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0)); + size = add_size(size, SimpleLruShmemSize(notify_slru_buffers, 0)); return size; } @@ -562,7 +562,7 @@ AsyncShmemInit(void) * Set up SLRU management of the pg_notify data. */ NotifyCtl->PagePrecedes = asyncQueuePagePrecedes; - SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0, + SimpleLruInit(NotifyCtl, "Notify", notify_slru_buffers, 0, NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER, SYNC_HANDLER_NONE); diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d493aeef0f..fad8cc572e 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -872,7 +872,7 @@ SerialInit(void) */ SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically; SimpleLruInit(SerialSlruCtl, "Serial", - NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial", + serial_slru_buffers, 0, SerialSLRULock, "pg_serial", LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE); #ifdef USE_ASSERT_CHECKING SerialPagePrecedesLogicallyUnitTests(); @@ -1395,7 +1395,7 @@ PredicateLockShmemSize(void) /* Shared memory structures for SLRU tracking of old committed xids. */ size = add_size(size, sizeof(SerialControlData)); - size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0)); + size = add_size(size, SimpleLruShmemSize(serial_slru_buffers, 0)); return size; } diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 73e0a672ae..f163ca17e9 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -148,3 +148,11 @@ int64 VacuumPageDirty = 0; int VacuumCostBalance = 0; /* working state for vacuum */ bool VacuumCostActive = false; + +int multixact_offsets_slru_buffers = 8; +int multixact_members_slru_buffers = 16; +int subtrans_slru_buffers = 32; +int notify_slru_buffers = 8; +int serial_slru_buffers = 16; +int clog_slru_buffers = 0; +int commit_ts_slru_buffers = 0; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0c5dc4d3e8..b65a4ae9ce 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2305,6 +2305,83 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"multixact_offsets_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for MultiXact offsets SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + &multixact_offsets_slru_buffers, + 8, 2, INT_MAX / 2, + NULL, NULL, NULL + }, + + { + {"multixact_members_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for MultiXact members SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + &multixact_members_slru_buffers, + 16, 2, INT_MAX / 2, + NULL, NULL, NULL + }, + + { + {"subtrans_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for substransactions SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + &subtrans_slru_buffers, + 32, 2, INT_MAX / 2, + NULL, NULL, NULL + }, + + { + {"notify_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for asyncronous notifications SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + ¬ify_slru_buffers, + 8, 2, INT_MAX / 2, + NULL, NULL, NULL + }, + + { + {"serial_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for predicate locks SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + &serial_slru_buffers, + 16, 2, INT_MAX / 2, + NULL, NULL, NULL + }, + + { + {"clog_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for commit log SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + &serial_slru_buffers, + 0, 0, INT_MAX / 2, + NULL, NULL, NULL + }, + + { + {"commit_ts_slru_buffers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the number of shared memory buffers used for commit timestamps SLRU."), + NULL, + GUC_UNIT_BLOCKS + }, + &commit_ts_slru_buffers, + 0, 0, INT_MAX / 2, + NULL, NULL, NULL + }, + { {"temp_buffers", PGC_USERSET, RESOURCES_MEM, gettext_noop("Sets the maximum number of temporary buffers used by each session."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b234a6bfe6..308fd565d3 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -190,6 +190,22 @@ # (change requires restart) #backend_flush_after = 0 # measured in pages, 0 disables +# - SLRU Buffers - + +#multixact_offsets_slru_buffers = 8 # memory used for MultiXact offsets + # (change requires restart) +#multixact_members_slru_buffers = 16 # memory used for MultiXact members + # (change requires restart) +#subtrans_slru_buffers = 32 # memory used for subtransactions + # (change requires restart) +#notify_slru_buffers = 8 # memory used for asynchronous notifications + # (change requires restart) +#serial_slru_buffers = 16 # memory used for predicate locks + # (change requires restart) +#clog_slru_buffers = 0 # memory used for CLOG + # (change requires restart) +#commit_ts_slru_buffers = 0 # memory used for commit timestamps + # (change requires restart) #------------------------------------------------------------------------------ # WRITE-AHEAD LOG diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 4bbb035eae..97c0a46376 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -29,10 +29,6 @@ #define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) -/* Number of SLRU buffers to use for multixact */ -#define NUM_MULTIXACTOFFSET_BUFFERS 8 -#define NUM_MULTIXACTMEMBER_BUFFERS 16 - /* * Possible multixact lock modes ("status"). The first four modes are for * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index d0ab44ae82..ca0999056e 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -11,9 +11,6 @@ #ifndef SUBTRANS_H #define SUBTRANS_H -/* Number of SLRU buffers to use for subtrans */ -#define NUM_SUBTRANS_BUFFERS 32 - extern void SubTransSetParent(TransactionId xid, TransactionId parent); extern TransactionId SubTransGetParent(TransactionId xid); extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); diff --git a/src/include/commands/async.h b/src/include/commands/async.h index 9217f66b91..fa831e3721 100644 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -15,11 +15,6 @@ #include <signal.h> -/* - * The number of SLRU page buffers we use for the notification queue. - */ -#define NUM_NOTIFY_BUFFERS 8 - extern bool Trace_notify; extern volatile sig_atomic_t notifyInterruptPending; diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 013850ac28..3d9f585fb9 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -162,6 +162,14 @@ extern PGDLLIMPORT int MaxBackends; extern PGDLLIMPORT int MaxConnections; extern PGDLLIMPORT int max_worker_processes; extern PGDLLIMPORT int max_parallel_workers; +extern PGDLLIMPORT int multixact_offsets_slru_buffers; +extern PGDLLIMPORT int multixact_members_slru_buffers; +extern PGDLLIMPORT int multixact_members_slru_buffers; +extern PGDLLIMPORT int subtrans_slru_buffers; +extern PGDLLIMPORT int notify_slru_buffers; +extern PGDLLIMPORT int serial_slru_buffers; +extern PGDLLIMPORT int clog_slru_buffers; +extern PGDLLIMPORT int commit_ts_slru_buffers; extern PGDLLIMPORT int MyProcPid; extern PGDLLIMPORT pg_time_t MyStartTime; diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h index 152b698611..c72779bd88 100644 --- a/src/include/storage/predicate.h +++ b/src/include/storage/predicate.h @@ -26,10 +26,6 @@ extern int max_predicate_locks_per_xact; extern int max_predicate_locks_per_relation; extern int max_predicate_locks_per_page; - -/* Number of SLRU buffers to use for Serial SLRU */ -#define NUM_SERIAL_BUFFERS 16 - /* * A handle used for sharing SERIALIZABLEXACT objects between the participants * in a parallel query. -- 2.30.1
From 65600b53939c34abf43e62f3f59be5671c43d301 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Thu, 25 Mar 2021 10:11:31 +1300 Subject: [PATCH v11 2/2] Add buffer mapping table for SLRUs. --- src/backend/access/transam/slru.c | 87 ++++++++++++++++++++++++++++--- src/include/access/slru.h | 2 + 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 82149ad782..487585bb60 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -58,6 +58,8 @@ #include "pgstat.h" #include "storage/fd.h" #include "storage/shmem.h" +#include "utils/dynahash.h" +#include "utils/hsearch.h" #define SlruFileName(ctl, path, seg) \ snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) @@ -79,6 +81,12 @@ typedef struct SlruWriteAllData typedef struct SlruWriteAllData *SlruWriteAll; +typedef struct SlruMappingTableEntry +{ + int pageno; + int slotno; +} SlruMappingTableEntry; + /* * Populate a file tag describing a segment file. We only use the segment * number, since we can derive everything else we need by having separate @@ -146,6 +154,9 @@ static int SlruSelectLRUPage(SlruCtl ctl, int pageno); static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data); static void SlruInternalDeleteSegment(SlruCtl ctl, int segno); +static void SlruMappingAdd(SlruCtl ctl, int pageno, int slotno); +static void SlruMappingRemove(SlruCtl ctl, int pageno); +static int SlruMappingFind(SlruCtl ctl, int pageno); /* * Initialization of shared memory @@ -168,7 +179,8 @@ SimpleLruShmemSize(int nslots, int nlsns) if (nlsns > 0) sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */ - return BUFFERALIGN(sz) + BLCKSZ * nslots; + return BUFFERALIGN(sz) + BLCKSZ * nslots + + hash_estimate_size(nslots, sizeof(SlruMappingTableEntry)); } /* @@ -187,6 +199,9 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id, SyncRequestHandler sync_handler) { + char mapping_table_name[SHMEM_INDEX_KEYSIZE]; + HASHCTL mapping_table_info; + HTAB *mapping_table; SlruShared shared; bool found; @@ -258,11 +273,21 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, else Assert(found); + /* Create or find the buffer mapping table. */ + memset(&mapping_table_info, 0, sizeof(mapping_table_info)); + mapping_table_info.keysize = sizeof(int); + mapping_table_info.entrysize = sizeof(SlruMappingTableEntry); + snprintf(mapping_table_name, sizeof(mapping_table_name), + "%s Mapping Table", name); + mapping_table = ShmemInitHash(mapping_table_name, nslots, nslots, + &mapping_table_info, HASH_ELEM | HASH_BLOBS); + /* * Initialize the unshared control struct, including directory path. We * assume caller set PagePrecedes. */ ctl->shared = shared; + ctl->mapping_table = mapping_table; ctl->sync_handler = sync_handler; strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir)); } @@ -289,6 +314,9 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno) shared->page_number[slotno] == pageno); /* Mark the slot as containing this page */ + if (shared->page_status[slotno] != SLRU_PAGE_EMPTY) + SlruMappingRemove(ctl, shared->page_number[slotno]); + SlruMappingAdd(ctl, pageno, slotno); shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_VALID; shared->page_dirty[slotno] = true; @@ -362,7 +390,10 @@ SimpleLruWaitIO(SlruCtl ctl, int slotno) { /* indeed, the I/O must have failed */ if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS) + { + SlruMappingRemove(ctl, shared->page_number[slotno]); shared->page_status[slotno] = SLRU_PAGE_EMPTY; + } else /* write_in_progress */ { shared->page_status[slotno] = SLRU_PAGE_VALID; @@ -436,6 +467,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, !shared->page_dirty[slotno])); /* Mark the slot read-busy */ + SlruMappingAdd(ctl, pageno, slotno); shared->page_number[slotno] = pageno; shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS; shared->page_dirty[slotno] = false; @@ -459,7 +491,13 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS && !shared->page_dirty[slotno]); - shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY; + if (ok) + shared->page_status[slotno] = SLRU_PAGE_VALID; + else + { + SlruMappingRemove(ctl, pageno); + shared->page_status[slotno] = SLRU_PAGE_EMPTY; + } LWLockRelease(&shared->buffer_locks[slotno].lock); @@ -1029,11 +1067,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) int best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ - for (slotno = 0; slotno < shared->num_slots; slotno++) + slotno = SlruMappingFind(ctl, pageno); + if (slotno >= 0) { - if (shared->page_number[slotno] == pageno && - shared->page_status[slotno] != SLRU_PAGE_EMPTY) - return slotno; + Assert(shared->page_number[slotno] == pageno); + Assert(shared->page_status[slotno] != SLRU_PAGE_EMPTY); + return slotno; } /* @@ -1266,6 +1305,7 @@ restart:; if (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno]) { + SlruMappingRemove(ctl, shared->page_number[slotno]); shared->page_status[slotno] = SLRU_PAGE_EMPTY; continue; } @@ -1348,6 +1388,7 @@ restart: if (shared->page_status[slotno] == SLRU_PAGE_VALID && !shared->page_dirty[slotno]) { + SlruMappingRemove(ctl, shared->page_number[slotno]); shared->page_status[slotno] = SLRU_PAGE_EMPTY; continue; } @@ -1609,3 +1650,37 @@ SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path) errno = save_errno; return result; } + +static int +SlruMappingFind(SlruCtl ctl, int pageno) +{ + SlruMappingTableEntry *mapping; + + mapping = hash_search(ctl->mapping_table, &pageno, HASH_FIND, NULL); + if (mapping) + return mapping->slotno; + + return -1; +} + +static void +SlruMappingAdd(SlruCtl ctl, int pageno, int slotno) +{ + SlruMappingTableEntry *mapping; + bool found PG_USED_FOR_ASSERTS_ONLY; + + mapping = hash_search(ctl->mapping_table, &pageno, HASH_ENTER, &found); + mapping->slotno = slotno; + + Assert(!found); +} + +static void +SlruMappingRemove(SlruCtl ctl, int pageno) +{ + bool found PG_USED_FOR_ASSERTS_ONLY; + + hash_search(ctl->mapping_table, &pageno, HASH_REMOVE, &found); + + Assert(found); +} diff --git a/src/include/access/slru.h b/src/include/access/slru.h index dd52e8cec7..8aa3efc0ee 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -16,6 +16,7 @@ #include "access/xlogdefs.h" #include "storage/lwlock.h" #include "storage/sync.h" +#include "utils/hsearch.h" /* @@ -110,6 +111,7 @@ typedef SlruSharedData *SlruShared; typedef struct SlruCtlData { SlruShared shared; + HTAB *mapping_table; /* * Which sync handler function to use when handing sync requests over to -- 2.30.1