In short, I propose the attached. -- Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
>From b4ba8135f8044e0077a27fcf6ad18451380cbcb3 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera <alvhe...@alvh.no-ip.org> Date: Wed, 31 Jan 2024 12:27:51 +0100 Subject: [PATCH v2] Use atomics for SlruSharedData->latest_page_number
--- src/backend/access/transam/clog.c | 6 +--- src/backend/access/transam/commit_ts.c | 7 ++--- src/backend/access/transam/multixact.c | 28 +++++++++++------- src/backend/access/transam/slru.c | 40 +++++++++++++++++++------- src/include/access/slru.h | 5 +++- 5 files changed, 54 insertions(+), 32 deletions(-) diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index f6e7da7ffc..f8aa91eb0a 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -766,14 +766,10 @@ StartupCLOG(void) TransactionId xid = XidFromFullTransactionId(TransamVariables->nextXid); int64 pageno = TransactionIdToPage(xid); - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); - /* * Initialize our idea of the latest page number. */ - XactCtl->shared->latest_page_number = pageno; - - LWLockRelease(XactSLRULock); + pg_atomic_write_u64(XactCtl->shared->latest_page_number, pageno); } /* diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 61b82385f3..6bfe60343e 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -689,9 +689,7 @@ ActivateCommitTs(void) /* * Re-Initialize our idea of the latest page number. */ - LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); - CommitTsCtl->shared->latest_page_number = pageno; - LWLockRelease(CommitTsSLRULock); + pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number, pageno); /* * If CommitTs is enabled, but it wasn't in the previous server run, we @@ -1006,7 +1004,8 @@ commit_ts_redo(XLogReaderState *record) * During XLOG replay, latest_page_number isn't set up yet; insert a * suitable value to bypass the sanity test in SimpleLruTruncate. */ - CommitTsCtl->shared->latest_page_number = trunc->pageno; + pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number, + trunc->pageno); SimpleLruTruncate(CommitTsCtl, trunc->pageno); } diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 59523be901..febc429f72 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -2017,13 +2017,15 @@ StartupMultiXact(void) * Initialize offset's idea of the latest page number. */ pageno = MultiXactIdToOffsetPage(multi); - MultiXactOffsetCtl->shared->latest_page_number = pageno; + pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number, + pageno); /* * Initialize member's idea of the latest page number. */ pageno = MXOffsetToMemberPage(offset); - MultiXactMemberCtl->shared->latest_page_number = pageno; + pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number, + pageno); } /* @@ -2047,14 +2049,15 @@ TrimMultiXact(void) oldestMXactDB = MultiXactState->oldestMultiXactDB; LWLockRelease(MultiXactGenLock); - /* Clean up offsets state */ - LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); - /* * (Re-)Initialize our idea of the latest page number for offsets. */ pageno = MultiXactIdToOffsetPage(nextMXact); - MultiXactOffsetCtl->shared->latest_page_number = pageno; + pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number, + pageno); + + /* Clean up offsets state */ + LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); /* * Zero out the remainder of the current offsets page. See notes in @@ -2081,14 +2084,16 @@ TrimMultiXact(void) LWLockRelease(MultiXactOffsetSLRULock); - /* And the same for members */ - LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); - /* + * And the same for members. + * * (Re-)Initialize our idea of the latest page number for members. */ pageno = MXOffsetToMemberPage(offset); - MultiXactMemberCtl->shared->latest_page_number = pageno; + pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number, + pageno); + + LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); /* * Zero out the remainder of the current members page. See notes in @@ -3333,7 +3338,8 @@ multixact_redo(XLogReaderState *record) * SimpleLruTruncate. */ pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff); - MultiXactOffsetCtl->shared->latest_page_number = pageno; + pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number, + pageno); PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff); LWLockRelease(MultiXactTruncationLock); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 9ac4790f16..c1d0dfc73b 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -17,7 +17,8 @@ * per-buffer LWLocks that synchronize I/O for each buffer. The control lock * must be held to examine or modify any shared state. A process that is * reading in or writing out a page buffer does not hold the control lock, - * only the per-buffer lock for the buffer it is working on. + * only the per-buffer lock for the buffer it is working on. One exception + * is latest_page_number, which is read and written using atomic ops. * * "Holding the control lock" means exclusive lock in all cases except for * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for @@ -239,8 +240,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, shared->lsn_groups_per_page = nlsns; shared->cur_lru_count = 0; - - /* shared->latest_page_number will be set later */ + pg_atomic_write_u64(&shared->latest_page_number, 0); shared->slru_stats_idx = pgstat_get_slru_index(name); @@ -329,8 +329,16 @@ SimpleLruZeroPage(SlruCtl ctl, int64 pageno) /* Set the LSNs for this new page to zero */ SimpleLruZeroLSNs(ctl, slotno); - /* Assume this page is now the latest active page */ - shared->latest_page_number = pageno; + /* + * Assume this page is now the latest active page. + * + * Note that because both this routine and SlruSelectLRUPage run with + * a bank lock held, it is not possible for this to be zeroing a page + * that SlruSelectLRUPage is going to evict simultaneously -- they would + * both have to hold the same bank lock! Therefore, there's no memory + * barrier here. + */ + pg_atomic_write_u64(&shared->latest_page_number, pageno); /* update the stats counter of zeroed pages */ pgstat_count_slru_page_zeroed(shared->slru_stats_idx); @@ -1113,9 +1121,17 @@ SlruSelectLRUPage(SlruCtl ctl, int64 pageno) shared->page_lru_count[slotno] = cur_count; this_delta = 0; } + + /* + * If this page is the one most recently zeroed, don't consider it + * an eviction candidate. See comments in SimpleLruZeroPage for an + * explanation about the lack of a memory barrier here. + */ this_page_number = shared->page_number[slotno]; - if (this_page_number == shared->latest_page_number) + if (this_page_number == + pg_atomic_read_u64(&shared->latest_page_number)) continue; + if (shared->page_status[slotno] == SLRU_PAGE_VALID) { if (this_delta > best_valid_delta || @@ -1254,7 +1270,6 @@ void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage) { SlruShared shared = ctl->shared; - int slotno; /* update the stats counter of truncates */ pgstat_count_slru_truncate(shared->slru_stats_idx); @@ -1270,10 +1285,13 @@ SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage) restart: /* - * While we are holding the lock, make an important safety check: the - * current endpoint page must not be eligible for removal. + * An important safety check: the current endpoint page must not be + * eligible for removal. Like SlruSelectLRUPage, we don't need a + * memory barrier here because for the affected page to be relevant, + * we'd have to have the same bank lock as SimpleLruZeroPage. */ - if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage)) + if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number), + cutoffPage)) { LWLockRelease(shared->ControlLock); ereport(LOG, @@ -1282,7 +1300,7 @@ restart: return; } - for (slotno = 0; slotno < shared->num_slots; slotno++) + for (int slotno = 0; slotno < shared->num_slots; slotno++) { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) continue; diff --git a/src/include/access/slru.h b/src/include/access/slru.h index b05f6bc71d..2109488654 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -49,6 +49,9 @@ typedef enum /* * Shared-memory state + * + * ControlLock is used to protect access to the other fields, except + * latest_page_number, which uses atomics; see comment in slru.c. */ typedef struct SlruSharedData { @@ -95,7 +98,7 @@ typedef struct SlruSharedData * this is not critical data, since we use it only to avoid swapping out * the latest page. */ - int64 latest_page_number; + pg_atomic_uint64 latest_page_number; /* SLRU's index for statistics purposes (might not be unique) */ int slru_stats_idx; -- 2.39.2