On Sat, Apr 11, 2020 at 4:10 PM Thomas Munro <thomas.mu...@gmail.com> wrote: > I received a report off-list from someone who experimented with the > patch I shared earlier on this thread[1], using a crash recovery test > similar to one I showed on the WAL prefetching thread[2] (which he was > also testing, separately).
Rebased. I'll add this to the open commitfest.
From 03662f6a546329b58d41897d3b8096d5794eacdc Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Tue, 31 Dec 2019 13:22:49 +1300 Subject: [PATCH v2] Cache smgrnblocks() results in recovery. Avoid repeatedly calling lseek(SEEK_END) during recovery by caching the size of each fork. For now, we can't use the same technique in other processes (for example: query planning and seq scans are two sources of high frequency lseek(SEEK_END) calls), because we lack a shared invalidation mechanism. Do this by generalizing the pre-existing caching used by FSM and VM to support all forks. Discussion: https://postgr.es/m/CAEepm%3D3SSw-Ty1DFcK%3D1rU-K6GSzYzfdD4d%2BZwapdN7dTa6%3DnQ%40mail.gmail.com --- contrib/pg_visibility/pg_visibility.c | 2 +- src/backend/access/heap/visibilitymap.c | 18 ++++----- src/backend/catalog/storage.c | 4 +- src/backend/storage/freespace/freespace.c | 27 +++++++------ src/backend/storage/smgr/smgr.c | 49 +++++++++++++++++------ src/include/storage/smgr.h | 12 +++--- 6 files changed, 66 insertions(+), 46 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 68d580ed1e..e731161734 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -392,7 +392,7 @@ pg_truncate_visibility_map(PG_FUNCTION_ARGS) check_relation_relkind(rel); RelationOpenSmgr(rel); - rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; + rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber; block = visibilitymap_prepare_truncate(rel, 0); if (BlockNumberIsValid(block)) diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 0a51678c40..b1072183bc 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -561,17 +561,16 @@ vm_readbuf(Relation rel, BlockNumber blkno, bool extend) * If we haven't cached the size of the visibility map fork yet, check it * first. */ - if (rel->rd_smgr->smgr_vm_nblocks == InvalidBlockNumber) + if (rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber) { if (smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) - rel->rd_smgr->smgr_vm_nblocks = smgrnblocks(rel->rd_smgr, - VISIBILITYMAP_FORKNUM); + smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM); else - rel->rd_smgr->smgr_vm_nblocks = 0; + rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = 0; } /* Handle requests beyond EOF */ - if (blkno >= rel->rd_smgr->smgr_vm_nblocks) + if (blkno >= rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM]) { if (extend) vm_extend(rel, blkno + 1); @@ -641,11 +640,13 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) * Create the file first if it doesn't exist. If smgr_vm_nblocks is * positive then it must exist, no need for an smgrexists call. */ - if ((rel->rd_smgr->smgr_vm_nblocks == 0 || - rel->rd_smgr->smgr_vm_nblocks == InvalidBlockNumber) && + if ((rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == 0 || + rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber) && !smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM)) smgrcreate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, false); + /* Invalidate cache so that smgrnblocks() asks the kernel. */ + rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber; vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM); /* Now extend the file */ @@ -667,8 +668,5 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) */ CacheInvalidateSmgr(rel->rd_smgr->smgr_rnode); - /* Update local cache with the up-to-date size */ - rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now; - UnlockRelationForExtension(rel, ExclusiveLock); } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index ec143b640a..9e6e6c42d3 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -290,8 +290,8 @@ RelationTruncate(Relation rel, BlockNumber nblocks) * Make sure smgr_targblock etc aren't pointing somewhere past new end */ rel->rd_smgr->smgr_targblock = InvalidBlockNumber; - rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber; - rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; + for (int i = 0; i <= MAX_FORKNUM; ++i) + rel->rd_smgr->smgr_cached_nblocks[i] = InvalidBlockNumber; /* Prepare for truncation of MAIN fork of the relation */ forks[nforks] = MAIN_FORKNUM; diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 95a21f6cc3..6a96126b0c 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -541,18 +541,19 @@ fsm_readbuf(Relation rel, FSMAddress addr, bool extend) * value might be stale. (We send smgr inval messages on truncation, but * not on extension.) */ - if (rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber || - blkno >= rel->rd_smgr->smgr_fsm_nblocks) + if (rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber || + blkno >= rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM]) { + /* Invalidate the cache so smgrnblocks asks the kernel. */ + rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber; if (smgrexists(rel->rd_smgr, FSM_FORKNUM)) - rel->rd_smgr->smgr_fsm_nblocks = smgrnblocks(rel->rd_smgr, - FSM_FORKNUM); + smgrnblocks(rel->rd_smgr, FSM_FORKNUM); else - rel->rd_smgr->smgr_fsm_nblocks = 0; + rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] = 0; } /* Handle requests beyond EOF */ - if (blkno >= rel->rd_smgr->smgr_fsm_nblocks) + if (blkno >= rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM]) { if (extend) fsm_extend(rel, blkno + 1); @@ -621,14 +622,17 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) RelationOpenSmgr(rel); /* - * Create the FSM file first if it doesn't exist. If smgr_fsm_nblocks is - * positive then it must exist, no need for an smgrexists call. + * Create the FSM file first if it doesn't exist. If + * smgr_cached_nblocks[FSM_FORKNUM] is positive then it must exist, no + * need for an smgrexists call. */ - if ((rel->rd_smgr->smgr_fsm_nblocks == 0 || - rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber) && + if ((rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] == 0 || + rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber) && !smgrexists(rel->rd_smgr, FSM_FORKNUM)) smgrcreate(rel->rd_smgr, FSM_FORKNUM, false); + /* Invalidate cache so that smgrnblocks() asks the kernel. */ + rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber; fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM); while (fsm_nblocks_now < fsm_nblocks) @@ -640,9 +644,6 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) fsm_nblocks_now++; } - /* Update local cache with the up-to-date size */ - rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now; - UnlockRelationForExtension(rel, ExclusiveLock); } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7d667c6586..d60cc3ab61 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -17,6 +17,7 @@ */ #include "postgres.h" +#include "access/xlog.h" #include "lib/ilist.h" #include "storage/bufmgr.h" #include "storage/ipc.h" @@ -174,8 +175,8 @@ smgropen(RelFileNode rnode, BackendId backend) /* hash_search already filled in the lookup key */ reln->smgr_owner = NULL; reln->smgr_targblock = InvalidBlockNumber; - reln->smgr_fsm_nblocks = InvalidBlockNumber; - reln->smgr_vm_nblocks = InvalidBlockNumber; + for (int i = 0; i <= MAX_FORKNUM; ++i) + reln->smgr_cached_nblocks[i] = InvalidBlockNumber; reln->smgr_which = 0; /* we only have md.c at present */ /* implementation-specific initialization */ @@ -464,6 +465,16 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, { smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, buffer, skipFsync); + + /* + * Normally we expect this to increase nblocks by one, but if the cached + * value isn't as expected, just invalidate it so the next call asks the + * kernel. + */ + if (reln->smgr_cached_nblocks[forknum] == blocknum) + reln->smgr_cached_nblocks[forknum] = blocknum + 1; + else + reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; } /* @@ -537,7 +548,20 @@ smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum) { - return smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + BlockNumber result; + + /* + * For now, we only use cached values in recovery due to lack of shared + * invalidation for extensions. + */ + if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber) + return reln->smgr_cached_nblocks[forknum]; + + result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + + reln->smgr_cached_nblocks[forknum] = result; + + return result; } /* @@ -576,20 +600,19 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb /* Do the truncation */ for (i = 0; i < nforks; i++) { + /* Make the cached size is invalid if we encounter an error. */ + reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber; + smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]); /* - * We might as well update the local smgr_fsm_nblocks and - * smgr_vm_nblocks settings. The smgr cache inval message that this - * function sent will cause other backends to invalidate their copies - * of smgr_fsm_nblocks and smgr_vm_nblocks, and these ones too at the - * next command boundary. But these ensure they aren't outright wrong - * until then. + * We might as well update the local smgr_cached_nblocks values. The + * smgr cache inval message that this function sent will cause other + * backends to invalidate their copies of smgr_fsm_nblocks and + * smgr_vm_nblocks, and these ones too at the next command boundary. + * But these ensure they aren't outright wrong until then. */ - if (forknum[i] == FSM_FORKNUM) - reln->smgr_fsm_nblocks = nblocks[i]; - if (forknum[i] == VISIBILITYMAP_FORKNUM) - reln->smgr_vm_nblocks = nblocks[i]; + reln->smgr_cached_nblocks[forknum[i]] = nblocks[i]; } } diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 6566659593..f28a842401 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -45,15 +45,13 @@ typedef struct SMgrRelationData struct SMgrRelationData **smgr_owner; /* - * These next three fields are not actually used or manipulated by smgr, - * except that they are reset to InvalidBlockNumber upon a cache flush - * event (in particular, upon truncation of the relation). Higher levels - * store cached state here so that it will be reset when truncation - * happens. In all three cases, InvalidBlockNumber means "unknown". + * The following fields are reset to InvalidBlockNumber upon a cache flush + * event, and hold the last known size for each fork. This information is + * currently only reliable during recovery, since there is no cache + * invalidation for fork extension. */ BlockNumber smgr_targblock; /* current insertion target block */ - BlockNumber smgr_fsm_nblocks; /* last known size of fsm fork */ - BlockNumber smgr_vm_nblocks; /* last known size of vm fork */ + BlockNumber smgr_cached_nblocks[MAX_FORKNUM + 1]; /* last known size */ /* additional public fields may someday exist here */ -- 2.20.1