On Mon, Mar 14, 2016 at 6:46 PM, David Steele <da...@pgmasters.net> wrote: > On 2/24/16 12:40 AM, Michael Paquier wrote: > >> This has the merit to be clear, thanks for the input. Whatever the >> approach taken at the end we have two candidates: >> - Extend XLogInsert() with an extra argument for flags (Andres) >> - Introduce XLogInsertExtended with this extra argument and let >> XLogInsert() in peace (Robert and I). >> Actually, I lied, there was still something I could do for this >> thread: attached are two patches implementing both approaches as >> respectively a-1 and a-2. Patch b is the set of logs I used for the >> tests to show with a low checkpoint_timeout that checkpoints are >> getting correctly skipped on an idle system. > > > Unfortunately neither A nor B apply anymore. > > However, since the patches can still be read through I wonder if Robert or > Andres would care to opine on whether A or B is better now that they can see > the full implementation? > > For my 2c I'm happy with XLogInsertExtended() since it seems to be a rare > use case where flags are required. This can always be refactored in the > future if/when the use of flags spreads. > > I think it would be good to make a decision on this before asking Michael to > rebase.
That's a bit embarrassing, the last versions should be able to apply cleanly as there have not been changes in this area of the code lately... But... I did a mistake when generating the patches by diff'ing them from an incorrect commit number... This explains why they exploded in size, so attached are the corrected rebased versions. Too many patches I guess.. And both of them are attached by the way. -- Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d7973bc..a2b4aff 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8343,8 +8343,12 @@ CreateCheckPoint(int flags) if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_FORCE)) == 0) { + elog(LOG, "Not a forced or shutdown checkpoint: progress_lsn %X/%X, ckpt %X/%X", + (uint32) (progress_lsn >> 32), (uint32) progress_lsn, + (uint32) (ControlFile->checkPoint >> 32), (uint32) ControlFile->checkPoint); if (progress_lsn == ControlFile->checkPoint) { + elog(LOG, "Checkpoint is skipped"); WALInsertLockRelease(); LWLockRelease(CheckpointLock); END_CRIT_SECTION(); @@ -8511,7 +8515,11 @@ CreateCheckPoint(int flags) * recovery we don't need to write running xact data. */ if (!shutdown && XLogStandbyInfoActive()) - LogStandbySnapshot(); + { + XLogRecPtr lsn = LogStandbySnapshot(); + elog(LOG, "snapshot taken by checkpoint %X/%X", + (uint32) (lsn >> 32), (uint32) lsn); + } START_CRIT_SECTION(); diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 79cfd7b..082e589 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -333,7 +333,9 @@ BackgroundWriterMain(void) GetLastCheckpointRecPtr() < current_progress_lsn && last_progress_lsn < current_progress_lsn) { - (void) LogStandbySnapshot(); + XLogRecPtr lsn = LogStandbySnapshot(); + elog(LOG, "snapshot taken by bgwriter %X/%X", + (uint32) (lsn >> 32), (uint32) lsn); last_snapshot_ts = now; last_progress_lsn = current_progress_lsn; }
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index c740952..22ebba5 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -634,7 +634,7 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx); XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, 0); page = BufferGetPage(meta); PageSetLSN(page, recptr); diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index d0ca485..98fcc2c 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -199,7 +199,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) newtup, newsz); - recptr = XLogInsert(RM_BRIN_ID, info); + recptr = XLogInsert(RM_BRIN_ID, info, 0); PageSetLSN(oldpage, recptr); } @@ -294,7 +294,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange, /* old page */ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD); - recptr = XLogInsert(RM_BRIN_ID, info); + recptr = XLogInsert(RM_BRIN_ID, info, 0); PageSetLSN(oldpage, recptr); PageSetLSN(newpage, recptr); @@ -444,7 +444,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange, XLogRegisterBuffer(1, revmapbuf, 0); - recptr = XLogInsert(RM_BRIN_ID, info); + recptr = XLogInsert(RM_BRIN_ID, info, 0); PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(revmapbuf), recptr); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index b2c273d..d30345f 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -487,7 +487,7 @@ revmap_physical_extend(BrinRevmap *revmap) XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, 0); PageSetLSN(metapage, recptr); PageSetLSN(page, recptr); } diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 06ba9cb..db2f959 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -415,7 +415,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, sizeof(BlockIdData) * 2); } - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, 0); PageSetLSN(page, recptr); if (childbuf != InvalidBuffer) PageSetLSN(childpage, recptr); @@ -576,7 +576,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, XLogRegisterData((char *) &data, sizeof(ginxlogSplit)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, 0); PageSetLSN(BufferGetPage(stack->buffer), recptr); PageSetLSN(BufferGetPage(rbuffer), recptr); if (stack->parent == NULL) diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index a55bb4a..5799c0a 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -829,7 +829,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs) { XLogRecPtr recptr; - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, 0); PageSetLSN(page, recptr); } @@ -1748,7 +1748,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, rootsize); XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, 0); PageSetLSN(page, recptr); } diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 2ddf568..7211dee 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -124,7 +124,7 @@ writeListPage(Relation index, Buffer buffer, XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); XLogRegisterBufData(0, workspace, size); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, 0); PageSetLSN(page, recptr); } @@ -406,7 +406,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT); XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, 0); PageSetLSN(metapage, recptr); if (buffer != InvalidBuffer) @@ -601,7 +601,7 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, XLogRegisterData((char *) &data, sizeof(ginxlogDeleteListPages)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, 0); PageSetLSN(metapage, recptr); for (i = 0; i < data.ndeleted; i++) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index cd21e0e..b8268da 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -350,7 +350,7 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT); XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, 0); page = BufferGetPage(RootBuffer); PageSetLSN(page, recptr); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 9450267..c9f61af 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -658,7 +658,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats) XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta)); XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, 0); PageSetLSN(metapage, recptr); } diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 6a4b98a..35f8f17 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -104,7 +104,7 @@ xlogVacuumPage(Relation index, Buffer buffer) XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, 0); PageSetLSN(page, recptr); } @@ -265,7 +265,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage)); - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE); + recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, 0); PageSetLSN(page, recptr); PageSetLSN(parentPage, recptr); PageSetLSN(BufferGetPage(lBuffer), recptr); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 4e43a69..6b41bdc 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -184,7 +184,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogBeginInsert(); XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT); - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, 0); PageSetLSN(page, recptr); } else diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index b48e97c..d066677 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -370,7 +370,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf, i++; } - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, 0); return recptr; } @@ -416,7 +416,7 @@ gistXLogUpdate(RelFileNode node, Buffer buffer, if (BufferIsValid(leftchildbuf)) XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD); - recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, 0); return recptr; } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 34ba385..fe82073 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2510,7 +2510,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP_ID, info); + recptr = XLogInsert(RM_HEAP_ID, info, 0); PageSetLSN(page, recptr); } @@ -2849,7 +2849,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP2_ID, info); + recptr = XLogInsert(RM_HEAP2_ID, info, 0); PageSetLSN(page, recptr); } @@ -3311,7 +3311,7 @@ l1: /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, 0); PageSetLSN(page, recptr); } @@ -5064,7 +5064,7 @@ failed: /* we don't decode row locks atm, so no need to log the origin */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, 0); PageSetLSN(page, recptr); } @@ -5706,7 +5706,7 @@ l4: XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, 0); PageSetLSN(page, recptr); } @@ -5846,7 +5846,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple) XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM, 0); PageSetLSN(page, recptr); } @@ -5980,7 +5980,7 @@ heap_abort_speculative(Relation relation, HeapTuple tuple) /* No replica identity & replication origin logged */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, 0); PageSetLSN(page, recptr); } @@ -6084,7 +6084,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple) /* inplace updates aren't decoded atm, don't log the origin */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE); + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, 0); PageSetLSN(page, recptr); } @@ -7143,7 +7143,7 @@ log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, 0); return recptr; } @@ -7202,7 +7202,7 @@ log_heap_clean(Relation reln, Buffer buffer, XLogRegisterBufData(0, (char *) nowunused, nunused * sizeof(OffsetNumber)); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN, 0); return recptr; } @@ -7238,7 +7238,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, XLogRegisterBufData(0, (char *) tuples, ntuples * sizeof(xl_heap_freeze_tuple)); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, 0); return recptr; } @@ -7275,7 +7275,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, flags |= REGBUF_NO_IMAGE; XLogRegisterBuffer(1, heap_buffer, flags); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE, 0); return recptr; } @@ -7495,7 +7495,7 @@ log_heap_update(Relation reln, Buffer oldbuf, /* filtering by origin on a row level is much more efficient */ XLogIncludeOrigin(); - recptr = XLogInsert(RM_HEAP_ID, info); + recptr = XLogInsert(RM_HEAP_ID, info, 0); return recptr; } @@ -7570,7 +7570,7 @@ log_heap_new_cid(Relation relation, HeapTuple tup) /* will be looked at irrespective of origin */ - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID); + recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID, 0); return recptr; } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index f9ce986..df94448 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -931,7 +931,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state) XLogRegisterData(waldata_start, len); /* write xlog record */ - XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE); + XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, 0); pfree(waldata_start); } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index e3c55eb..a2b10d7 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -906,7 +906,7 @@ _bt_insertonpg(Relation rel, else XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup)); - recptr = XLogInsert(RM_BTREE_ID, xlinfo); + recptr = XLogInsert(RM_BTREE_ID, xlinfo, 0); if (BufferIsValid(metabuf)) { @@ -1325,7 +1325,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright, else xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R; - recptr = XLogInsert(RM_BTREE_ID, xlinfo); + recptr = XLogInsert(RM_BTREE_ID, xlinfo, 0); PageSetLSN(origpage, recptr); PageSetLSN(rightpage, recptr); @@ -2045,7 +2045,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->pd_upper); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, 0); PageSetLSN(lpage, recptr); PageSetLSN(rootpage, recptr); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 67755d7..9453b0f 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -254,7 +254,7 @@ _bt_getroot(Relation rel, int access) XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, 0); PageSetLSN(rootpage, recptr); PageSetLSN(metapg, recptr); @@ -551,7 +551,7 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX XLogBeginInsert(); XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage); - XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE); + XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, 0); } /* @@ -837,7 +837,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, if (nitems > 0) XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber)); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, 0); PageSetLSN(page, recptr); } @@ -911,7 +911,7 @@ _bt_delitems_delete(Relation rel, Buffer buf, */ XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber)); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, 0); PageSetLSN(page, recptr); } @@ -1476,7 +1476,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead); - recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD); + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, 0); page = BufferGetPage(topparent); PageSetLSN(page, recptr); @@ -1820,7 +1820,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) else xlinfo = XLOG_BTREE_UNLINK_PAGE; - recptr = XLogInsert(RM_BTREE_ID, xlinfo); + recptr = XLogInsert(RM_BTREE_ID, xlinfo, 0); if (BufferIsValid(metabuf)) { diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index f090ca5..4d41db7 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -304,7 +304,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple, if (xlrec.offnumParent != InvalidOffsetNumber) XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, 0); PageSetLSN(current->page, recptr); @@ -541,7 +541,7 @@ moveLeafs(Relation index, SpGistState *state, XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0)); XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, 0); PageSetLSN(current->page, recptr); PageSetLSN(npage, recptr); @@ -1386,7 +1386,7 @@ doPickSplit(Relation index, SpGistState *state, } /* Issue the WAL record */ - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, 0); /* Update page LSNs on all affected pages */ if (newLeafBuffer != InvalidBuffer) @@ -1540,7 +1540,7 @@ spgAddNodeAction(Relation index, SpGistState *state, XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, 0); PageSetLSN(current->page, recptr); } @@ -1664,7 +1664,7 @@ spgAddNodeAction(Relation index, SpGistState *state, XLogRegisterData((char *) &xlrec, sizeof(xlrec)); XLogRegisterData((char *) newInnerTuple, newInnerTuple->size); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, 0); /* we don't bother to check if any of these are redundant */ PageSetLSN(current->page, recptr); @@ -1834,7 +1834,7 @@ spgSplitNodeAction(Relation index, SpGistState *state, XLogRegisterBuffer(1, newBuffer, flags); } - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, 0); PageSetLSN(current->page, recptr); diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 44fd644..507ae74 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -113,7 +113,7 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, 0); PageSetLSN(BufferGetPage(metabuffer), recptr); PageSetLSN(BufferGetPage(rootbuffer), recptr); diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 15b867f..5036219 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -389,7 +389,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, 0); PageSetLSN(page, recptr); } @@ -470,7 +470,7 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer) XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, 0); PageSetLSN(page, recptr); } @@ -591,7 +591,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer) XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); - recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT); + recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, 0); PageSetLSN(page, recptr); } diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 06aff18..15a5e53 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -701,7 +701,7 @@ WriteZeroPageXlogRec(int pageno) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); + (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, 0); } /* @@ -717,7 +717,7 @@ WriteTruncateXlogRec(int pageno) XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE); + recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, 0); XLogFlush(recptr); } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 1713439..adc42fc 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -893,7 +893,7 @@ WriteZeroPageXlogRec(int pageno) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); + (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, 0); } /* @@ -904,7 +904,7 @@ WriteTruncateXlogRec(int pageno) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE); + (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE, 0); } /* @@ -926,7 +926,7 @@ WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, offsetof(xl_commit_ts_set, mainxid) + sizeof(TransactionId)); XLogRegisterData((char *) subxids, nsubxids * sizeof(TransactionId)); - XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_SETTS); + XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_SETTS, 0); } /* diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index a677af0..9a1ad41 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -814,7 +814,7 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members) XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate); XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember)); - (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID); + (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, 0); /* Now enter the information into the OFFSETs and MEMBERs logs */ RecordNewMultiXact(multi, offset, nmembers, members); @@ -3177,7 +3177,7 @@ WriteMZeroPageXlogRec(int pageno, uint8 info) { XLogBeginInsert(); XLogRegisterData((char *) (&pageno), sizeof(int)); - (void) XLogInsert(RM_MULTIXACT_ID, info); + (void) XLogInsert(RM_MULTIXACT_ID, info, 0); } /* @@ -3204,7 +3204,7 @@ WriteMTruncateXlogRec(Oid oldestMultiDB, XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), SizeOfMultiXactTruncate); - recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID); + recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID, 0); XLogFlush(recptr); } diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index c4fd9ef..1eba72d 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1063,7 +1063,7 @@ EndPrepare(GlobalTransaction gxact) XLogBeginInsert(); for (record = records.head; record != NULL; record = record->next) XLogRegisterData(record->data, record->len); - gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE); + gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE, 0); XLogFlush(gxact->prepare_end_lsn); /* If we crash now, we have prepared: WAL replay will fix things */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 8a2cd45..ee3925f 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -630,7 +630,7 @@ AssignTransactionId(TransactionState s) XLogRegisterData((char *) unreportedXids, nUnreportedXids * sizeof(TransactionId)); - (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT); + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, 0); nUnreportedXids = 0; /* mark top, not current xact as having been logged */ @@ -5213,7 +5213,7 @@ XactLogCommitRecord(TimestampTz commit_time, /* we allow filtering by xacts */ XLogIncludeOrigin(); - return XLogInsert(RM_XACT_ID, info); + return XLogInsert(RM_XACT_ID, info, 0); } /* @@ -5300,7 +5300,7 @@ XactLogAbortRecord(TimestampTz abort_time, if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase)); - return XLogInsert(RM_XACT_ID, info); + return XLogInsert(RM_XACT_ID, info, 0); } /* diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5b1c361..d7973bc 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -436,11 +436,30 @@ typedef struct XLogwrtResult * the WAL record is just copied to the page and the lock is released. But * to avoid the deadlock-scenario explained above, the indicator is always * updated before sleeping while holding an insertion lock. + * + * The progressAt values indicate the insertion progress used to determine + * WAL insertion activity since a previous checkpoint, which is aimed at + * finding out if a checkpoint should be skipped or not or if standby + * activity should be logged. Progress position is basically updated + * for all types of records, for the time being only snapshot logging + * is out of this scope to properly skip their logging on idle systems. + * Tracking the WAL activity directly in WALInsertLock has the advantage + * to not rely on taking an exclusive lock on all the WAL insertion locks, + * hence reducing the impact of the activity lookup. This takes also + * advantage to avoid 8-byte torn reads on some platforms by using the + * fact that each insert lock is located on the same cache line. + * XXX: There is still room for more improvements here, particularly + * WAL operations related to unlogged relations (INIT_FORKNUM) should not + * update the progress LSN as those relations are reset during crash + * recovery so enforcing buffers of such relations to be flushed for + * example in the case of a load only on unlogged relations is a waste + * of disk write. */ typedef struct { LWLock lock; XLogRecPtr insertingAt; + XLogRecPtr progressAt; } WALInsertLock; /* @@ -878,6 +897,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * which pages need a full-page image, and retry. If fpw_lsn is invalid, the * record is always inserted. * + * 'flags' gives more in-depth control on the record being inserted. As of + * now, this controls if the progress LSN positions are updated. + * * The first XLogRecData in the chain must be for the record header, and its * data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and * xl_crc fields in the header, the rest of the header must already be filled @@ -890,7 +912,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * WAL rule "write the log before the data".) */ XLogRecPtr -XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) +XLogInsertRecord(XLogRecData *rdata, + XLogRecPtr fpw_lsn, + uint8 flags) { XLogCtlInsert *Insert = &XLogCtl->Insert; pg_crc32c rdata_crc; @@ -989,6 +1013,25 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) inserted = true; } + /* + * Update the progress LSN positions. At least one WAL insertion lock + * is already taken appropriately before doing that, and it is just more + * simple to do that here where WAL record data and type is at hand. + * The progress is set at the start position of the record tracked that + * is being added, making easier checkpoint progress tracking as the + * control file already saves the start LSN position of the last + * checkpoint run. If an exclusive lock is taken for WAL insertion, + * there is actually no need to update all the progression fields, so + * just do it on the first one. + */ + if ((flags & XLOG_INSERT_NO_PROGRESS) == 0) + { + if (holdingAllLocks) + WALInsertLocks[0].l.progressAt = StartPos; + else + WALInsertLocks[MyLockNo].l.progressAt = StartPos; + } + if (inserted) { /* @@ -4714,6 +4757,7 @@ XLOGShmemInit(void) { LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT); WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr; + WALInsertLocks[i].l.progressAt = InvalidXLogRecPtr; } /* @@ -7887,6 +7931,55 @@ GetFlushRecPtr(void) } /* + * GetProgressRecPtr -- Returns the newest WAL activity position, aimed + * at the last significant WAL activity, or in other words any activity + * not referring to standby logging as of now. Finding the last activity + * position is done by scanning each WAL insertion lock by taking directly + * the light-weight lock associated to it. + */ +XLogRecPtr +GetProgressRecPtr(void) +{ + XLogRecPtr res = InvalidXLogRecPtr; + int i; + + /* + * Look at the latest LSN position referring to the activity done by + * WAL insertion. An exclusive lock is taken because currently the + * locking logic for WAL insertion only expects such a level of locking. + * Taking a lock is as well necessary to prevent potential torn reads + * on some platforms. + */ + for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++) + { + XLogRecPtr progress_lsn; + + LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE); + progress_lsn = WALInsertLocks[i].l.progressAt; + LWLockRelease(&WALInsertLocks[i].l.lock); + + if (res < progress_lsn) + res = progress_lsn; + } + + return res; +} + +/* + * GetLastCheckpointRecPtr -- Returns the last checkpoint insert position. + */ +XLogRecPtr +GetLastCheckpointRecPtr(void) +{ + XLogRecPtr ckpt_lsn; + + LWLockAcquire(ControlFileLock, LW_SHARED); + ckpt_lsn = ControlFile->checkPoint; + LWLockRelease(ControlFileLock); + return ckpt_lsn; +} + +/* * Get the time of the last xlog segment switch */ pg_time_t @@ -8146,7 +8239,7 @@ CreateCheckPoint(int flags) uint32 freespace; XLogRecPtr PriorRedoPtr; XLogRecPtr curInsert; - XLogRecPtr prevPtr; + XLogRecPtr progress_lsn; VirtualTransactionId *vxids; int nvxids; @@ -8227,34 +8320,30 @@ CreateCheckPoint(int flags) checkPoint.oldestActiveXid = InvalidTransactionId; /* + * Get progress before acquiring insert locks to shorten the locked + * section waiting ahead. + */ + progress_lsn = GetProgressRecPtr(); + + /* * We must block concurrent insertions while examining insert state to * determine the checkpoint REDO pointer. */ WALInsertLockAcquireExclusive(); curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); - prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos); /* - * If this isn't a shutdown or forced checkpoint, and we have not inserted - * any XLOG records since the start of the last checkpoint, skip the - * checkpoint. The idea here is to avoid inserting duplicate checkpoints - * when the system is idle. That wastes log space, and more importantly it - * exposes us to possible loss of both current and previous checkpoint - * records if the machine crashes just as we're writing the update. - * (Perhaps it'd make even more sense to checkpoint only when the previous - * checkpoint record is in a different xlog page?) - * - * If the previous checkpoint crossed a WAL segment, however, we create - * the checkpoint anyway, to have the latest checkpoint fully contained in - * the new segment. This is for a little bit of extra robustness: it's - * better if you don't need to keep two WAL segments around to recover the - * checkpoint. + * If this isn't a shutdown or forced checkpoint, and if there has been no + * WAL activity, skip the checkpoint. The idea here is to avoid inserting + * duplicate checkpoints when the system is idle. That wastes log space, + * and more importantly it exposes us to possible loss of both current and + * previous checkpoint records if the machine crashes just as we're writing + * the update. */ if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_FORCE)) == 0) { - if (prevPtr == ControlFile->checkPointCopy.redo && - prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) + if (progress_lsn == ControlFile->checkPoint) { WALInsertLockRelease(); LWLockRelease(CheckpointLock); @@ -8433,7 +8522,7 @@ CreateCheckPoint(int flags) XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint)); recptr = XLogInsert(RM_XLOG_ID, shutdown ? XLOG_CHECKPOINT_SHUTDOWN : - XLOG_CHECKPOINT_ONLINE); + XLOG_CHECKPOINT_ONLINE, 0); XLogFlush(recptr); @@ -8587,7 +8676,7 @@ CreateEndOfRecoveryRecord(void) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery)); - recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY); + recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, 0); XLogFlush(recptr); @@ -8958,7 +9047,7 @@ XLogPutNextOid(Oid nextOid) { XLogBeginInsert(); XLogRegisterData((char *) (&nextOid), sizeof(Oid)); - (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID); + (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, 0); /* * We need not flush the NEXTOID record immediately, because any of the @@ -8997,7 +9086,7 @@ RequestXLogSwitch(void) /* XLOG SWITCH has no data */ XLogBeginInsert(); - RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH); + RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, 0); return RecPtr; } @@ -9017,7 +9106,7 @@ XLogRestorePoint(const char *rpName) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point)); - RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT); + RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, 0); ereport(LOG, (errmsg("restore point \"%s\" created at %X/%X", @@ -9064,7 +9153,7 @@ XLogReportParameters(void) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE); + recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, 0); XLogFlush(recptr); } @@ -9126,7 +9215,7 @@ UpdateFullPageWrites(void) XLogBeginInsert(); XLogRegisterData((char *) (&fullPageWrites), sizeof(bool)); - XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE); + XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, 0); } if (!fullPageWrites) @@ -10439,7 +10528,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p) */ XLogBeginInsert(); XLogRegisterData((char *) (&startpoint), sizeof(startpoint)); - stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END); + stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, 0); stoptli = ThisTimeLineID; /* diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index c37003a..bf800db 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -396,7 +396,9 @@ XLogIncludeOrigin(void) /* * Insert an XLOG record having the specified RMID and info bytes, with the * body of the record being the data and buffer references registered earlier - * with XLogRegister* calls. + * with XLogRegister* calls. 'flags' allow users to control more in-depth + * operations during WAL record insertion. As of now, this gives control on + * if the progress LSN positions are updated or not. * * Returns XLOG pointer to end of record (beginning of next record). * This can be used as LSN for data pages affected by the logged action. @@ -405,7 +407,7 @@ XLogIncludeOrigin(void) * WAL rule "write the log before the data".) */ XLogRecPtr -XLogInsert(RmgrId rmid, uint8 info) +XLogInsert(RmgrId rmid, uint8 info, uint8 flags) { XLogRecPtr EndPos; @@ -450,7 +452,7 @@ XLogInsert(RmgrId rmid, uint8 info) rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, &fpw_lsn); - EndPos = XLogInsertRecord(rdt, fpw_lsn); + EndPos = XLogInsertRecord(rdt, fpw_lsn, flags); } while (EndPos == InvalidXLogRecPtr); XLogResetInsertion(); @@ -915,7 +917,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) BufferGetTag(buffer, &rnode, &forkno, &blkno); XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags); - recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT); + recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT, 0); } return recptr; @@ -946,7 +948,7 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, XLogBeginInsert(); XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags); - recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI); + recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, 0); /* * The page may be uninitialized. If so, we can't set the LSN because that diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index fe68c99..1064600 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -134,7 +134,7 @@ log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE); + XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE, 0); } /* @@ -273,7 +273,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) XLogRegisterData((char *) &xlrec, sizeof(xlrec)); lsn = XLogInsert(RM_SMGR_ID, - XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE); + XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE, 0); /* * Flush, because otherwise the truncation of the main relation might diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index c1c0223..feae710 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -630,7 +630,8 @@ createdb(const CreatedbStmt *stmt) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE, + 0); } } heap_endscan(scan); @@ -1238,7 +1239,8 @@ movedb(const char *dbname, const char *tblspcname) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE, + 0); } /* @@ -1338,7 +1340,8 @@ movedb(const char *dbname, const char *tblspcname) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE, + 0); } /* Now it's safe to release the database lock */ @@ -1879,7 +1882,8 @@ remove_dbtablespaces(Oid db_id) XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE, + 0); } pfree(dstpath); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index c98f981..766b542 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -388,7 +388,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple) XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) tuple->t_data, tuple->t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } @@ -472,7 +472,7 @@ AlterSequence(AlterSeqStmt *stmt) XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } @@ -749,7 +749,7 @@ nextval_internal(Oid relid) XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } @@ -932,7 +932,7 @@ do_setval(Oid relid, int64 next, bool iscalled) XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); + recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, 0); PageSetLSN(page, recptr); } diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 1ff5728..2fde0ac 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -368,7 +368,7 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) offsetof(xl_tblspc_create_rec, ts_path)); XLogRegisterData((char *) location, strlen(location) + 1); - (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE); + (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, 0); } /* @@ -523,7 +523,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec)); - (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP); + (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, 0); } /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 00f03d8..79cfd7b 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -78,12 +78,12 @@ int BgWriterDelay = 200; #define LOG_SNAPSHOT_INTERVAL_MS 15000 /* - * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid - * doing so too often or repeatedly if there has been no other write activity - * in the system. + * Last progress LSN and timestamp at which we last logged a standby + * snapshot, to avoid doing so too often or repeatedly if there has been + * no other write activity in the system. */ static TimestampTz last_snapshot_ts; -static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr; +static XLogRecPtr last_progress_lsn = InvalidXLogRecPtr; /* * Flags set by interrupt handlers for later service in the main loop. @@ -310,7 +310,7 @@ BackgroundWriterMain(void) * check whether there has been any WAL inserted since the last time * we've logged a running xacts. * - * We do this logging in the bgwriter as its the only process that is + * We do this logging in the bgwriter as it is the only process that is * run regularly and returns to its mainloop all the time. E.g. * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. @@ -319,19 +319,23 @@ BackgroundWriterMain(void) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); + XLogRecPtr current_progress_lsn = GetProgressRecPtr(); timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS); /* - * only log if enough time has passed and some xlog record has - * been inserted. + * only log if enough time has passed, that some WAL activity + * has happened since last checkpoint, and that some xlog record + * has been inserted. */ if (now >= timeout && - last_snapshot_lsn != GetXLogInsertRecPtr()) + GetLastCheckpointRecPtr() < current_progress_lsn && + last_progress_lsn < current_progress_lsn) { - last_snapshot_lsn = LogStandbySnapshot(); + (void) LogStandbySnapshot(); last_snapshot_ts = now; + last_progress_lsn = current_progress_lsn; } } diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index 8c8833b..8870a27 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -362,7 +362,7 @@ replorigin_drop(RepOriginId roident) xlrec.node_id = roident; XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), sizeof(xlrec)); - XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP); + XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_DROP, 0); } /* then reset the in-memory entry */ @@ -894,7 +894,7 @@ replorigin_advance(RepOriginId node, XLogBeginInsert(); XLogRegisterData((char *) (&xlrec), sizeof(xlrec)); - XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET); + XLogInsert(RM_REPLORIGIN_ID, XLOG_REPLORIGIN_SET, 0); } /* diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 6a9bf84..4f934bb 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -952,7 +952,8 @@ LogStandbySnapshot(void) * The definitions of RunningTransactionsData and xl_xact_running_xacts * are similar. We keep them separate because xl_xact_running_xacts * is a contiguous chunk of memory and never exists fully until it is - * assembled in WAL. + * assembled in WAL. Progress of WAL activity is not updated when + * this record is logged. */ static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) @@ -976,7 +977,8 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) XLogRegisterData((char *) CurrRunningXacts->xids, (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId)); - recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); + recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, + XLOG_INSERT_NO_PROGRESS); if (CurrRunningXacts->subxid_overflow) elog(trace_recovery(DEBUG2), @@ -1024,7 +1026,8 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); - (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); + (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, + XLOG_INSERT_NO_PROGRESS); } /* diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 6a63b5e..146ad3b 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -767,7 +767,7 @@ write_relmap_file(bool shared, RelMapFile *newmap, XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate); XLogRegisterData((char *) newmap, sizeof(RelMapFile)); - lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE); + lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, 0); /* As always, WAL must hit the disk before the data update does */ XLogFlush(lsn); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ecd30ce..7844844 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -210,7 +210,9 @@ extern CheckpointStatsData CheckpointStats; struct XLogRecData; -extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn); +extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, + XLogRecPtr fpw_lsn, + uint8 flags); extern void XLogFlush(XLogRecPtr RecPtr); extern bool XLogBackgroundFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); @@ -261,6 +263,8 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); +extern XLogRecPtr GetProgressRecPtr(void); +extern XLogRecPtr GetLastCheckpointRecPtr(void); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void RemovePromoteSignalFiles(void); diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index cc0177e..271a26e 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -38,10 +38,14 @@ #define REGBUF_KEEP_DATA 0x10/* include data even if a full-page image is * taken */ +/* flags for XLogInsert */ +#define XLOG_INSERT_NO_PROGRESS 0x01 /* do not update progress LSN + * when inserting record */ + /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); extern void XLogIncludeOrigin(void); -extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); +extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, uint8 flags); extern void XLogEnsureRecordSpace(int nbuffers, int ndatas); extern void XLogRegisterData(char *data, int len); extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5b1c361..0ebd8fb 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -436,11 +436,30 @@ typedef struct XLogwrtResult * the WAL record is just copied to the page and the lock is released. But * to avoid the deadlock-scenario explained above, the indicator is always * updated before sleeping while holding an insertion lock. + * + * The progressAt values indicate the insertion progress used to determine + * WAL insertion activity since a previous checkpoint, which is aimed at + * finding out if a checkpoint should be skipped or not or if standby + * activity should be logged. Progress position is basically updated + * for all types of records, for the time being only snapshot logging + * is out of this scope to properly skip their logging on idle systems. + * Tracking the WAL activity directly in WALInsertLock has the advantage + * to not rely on taking an exclusive lock on all the WAL insertion locks, + * hence reducing the impact of the activity lookup. This takes also + * advantage to avoid 8-byte torn reads on some platforms by using the + * fact that each insert lock is located on the same cache line. + * XXX: There is still room for more improvements here, particularly + * WAL operations related to unlogged relations (INIT_FORKNUM) should not + * update the progress LSN as those relations are reset during crash + * recovery so enforcing buffers of such relations to be flushed for + * example in the case of a load only on unlogged relations is a waste + * of disk write. */ typedef struct { LWLock lock; XLogRecPtr insertingAt; + XLogRecPtr progressAt; } WALInsertLock; /* @@ -878,6 +897,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * which pages need a full-page image, and retry. If fpw_lsn is invalid, the * record is always inserted. * + * 'flags' gives more in-depth control on the record being inserted. As of + * now, this controls if the progress LSN positions are updated. + * * The first XLogRecData in the chain must be for the record header, and its * data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and * xl_crc fields in the header, the rest of the header must already be filled @@ -890,7 +912,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * WAL rule "write the log before the data".) */ XLogRecPtr -XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) +XLogInsertRecord(XLogRecData *rdata, + XLogRecPtr fpw_lsn, + uint8 flags) { XLogCtlInsert *Insert = &XLogCtl->Insert; pg_crc32c rdata_crc; @@ -989,6 +1013,25 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) inserted = true; } + /* + * Update the progress LSN positions. At least one WAL insertion lock + * is already taken appropriately before doing that, and it is just more + * simple to do that here where WAL record data and type is at hand. + * The progress is set at the start position of the record tracked that + * is being added, making easier checkpoint progress tracking as the + * control file already saves the start LSN position of the last + * checkpoint run. If an exclusive lock is taken for WAL insertion, + * there is actually no need to update all the progression fields, so + * just do it on the first one. + */ + if ((flags & XLOG_INSERT_NO_PROGRESS) == 0) + { + if (holdingAllLocks) + WALInsertLocks[0].l.progressAt = StartPos; + else + WALInsertLocks[MyLockNo].l.progressAt = StartPos; + } + if (inserted) { /* @@ -4714,6 +4757,7 @@ XLOGShmemInit(void) { LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT); WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr; + WALInsertLocks[i].l.progressAt = InvalidXLogRecPtr; } /* @@ -7887,6 +7931,55 @@ GetFlushRecPtr(void) } /* + * GetProgressRecPtr -- Returns the newest WAL activity position, aimed + * at the last significant WAL activity, or in other words any activity + * not referring to standby logging as of now. Finding the last activity + * position is done by scanning each WAL insertion lock by taking directly + * the light-weight lock associated to it. + */ +XLogRecPtr +GetProgressRecPtr(void) +{ + XLogRecPtr res = InvalidXLogRecPtr; + int i; + + /* + * Look at the latest LSN position referring to the activity done by + * WAL insertion. An exclusive lock is taken because currently the + * locking logic for WAL insertion only expects such a level of locking. + * Taking a lock is as well necessary to prevent potential torn reads + * on some platforms. + */ + for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++) + { + XLogRecPtr progress_lsn; + + LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE); + progress_lsn = WALInsertLocks[i].l.progressAt; + LWLockRelease(&WALInsertLocks[i].l.lock); + + if (res < progress_lsn) + res = progress_lsn; + } + + return res; +} + +/* + * GetLastCheckpointRecPtr -- Returns the last checkpoint insert position. + */ +XLogRecPtr +GetLastCheckpointRecPtr(void) +{ + XLogRecPtr ckpt_lsn; + + LWLockAcquire(ControlFileLock, LW_SHARED); + ckpt_lsn = ControlFile->checkPoint; + LWLockRelease(ControlFileLock); + return ckpt_lsn; +} + +/* * Get the time of the last xlog segment switch */ pg_time_t @@ -8146,7 +8239,7 @@ CreateCheckPoint(int flags) uint32 freespace; XLogRecPtr PriorRedoPtr; XLogRecPtr curInsert; - XLogRecPtr prevPtr; + XLogRecPtr progress_lsn; VirtualTransactionId *vxids; int nvxids; @@ -8227,34 +8320,30 @@ CreateCheckPoint(int flags) checkPoint.oldestActiveXid = InvalidTransactionId; /* + * Get progress before acquiring insert locks to shorten the locked + * section waiting ahead. + */ + progress_lsn = GetProgressRecPtr(); + + /* * We must block concurrent insertions while examining insert state to * determine the checkpoint REDO pointer. */ WALInsertLockAcquireExclusive(); curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); - prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos); /* - * If this isn't a shutdown or forced checkpoint, and we have not inserted - * any XLOG records since the start of the last checkpoint, skip the - * checkpoint. The idea here is to avoid inserting duplicate checkpoints - * when the system is idle. That wastes log space, and more importantly it - * exposes us to possible loss of both current and previous checkpoint - * records if the machine crashes just as we're writing the update. - * (Perhaps it'd make even more sense to checkpoint only when the previous - * checkpoint record is in a different xlog page?) - * - * If the previous checkpoint crossed a WAL segment, however, we create - * the checkpoint anyway, to have the latest checkpoint fully contained in - * the new segment. This is for a little bit of extra robustness: it's - * better if you don't need to keep two WAL segments around to recover the - * checkpoint. + * If this isn't a shutdown or forced checkpoint, and if there has been no + * WAL activity, skip the checkpoint. The idea here is to avoid inserting + * duplicate checkpoints when the system is idle. That wastes log space, + * and more importantly it exposes us to possible loss of both current and + * previous checkpoint records if the machine crashes just as we're writing + * the update. */ if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_FORCE)) == 0) { - if (prevPtr == ControlFile->checkPointCopy.redo && - prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) + if (progress_lsn == ControlFile->checkPoint) { WALInsertLockRelease(); LWLockRelease(CheckpointLock); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index c37003a..7cddc55 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -394,9 +394,23 @@ XLogIncludeOrigin(void) } /* + * XLogInsert + * + * A shorthand for XLogInsertExtended, to update the progress of WAL + * activity by default. + */ +XLogRecPtr +XLogInsert(RmgrId rmid, uint8 info) +{ + return XLogInsertExtended(rmid, info, 0); +} + +/* * Insert an XLOG record having the specified RMID and info bytes, with the * body of the record being the data and buffer references registered earlier - * with XLogRegister* calls. + * with XLogRegister* calls. 'flags' allow users to control more in-depth + * operations during WAL record insertion. As of now, this gives control on + * if the progress LSN positions are updated or not. * * Returns XLOG pointer to end of record (beginning of next record). * This can be used as LSN for data pages affected by the logged action. @@ -405,7 +419,7 @@ XLogIncludeOrigin(void) * WAL rule "write the log before the data".) */ XLogRecPtr -XLogInsert(RmgrId rmid, uint8 info) +XLogInsertExtended(RmgrId rmid, uint8 info, uint8 flags) { XLogRecPtr EndPos; @@ -450,7 +464,7 @@ XLogInsert(RmgrId rmid, uint8 info) rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, &fpw_lsn); - EndPos = XLogInsertRecord(rdt, fpw_lsn); + EndPos = XLogInsertRecord(rdt, fpw_lsn, flags); } while (EndPos == InvalidXLogRecPtr); XLogResetInsertion(); diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 00f03d8..79cfd7b 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -78,12 +78,12 @@ int BgWriterDelay = 200; #define LOG_SNAPSHOT_INTERVAL_MS 15000 /* - * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid - * doing so too often or repeatedly if there has been no other write activity - * in the system. + * Last progress LSN and timestamp at which we last logged a standby + * snapshot, to avoid doing so too often or repeatedly if there has been + * no other write activity in the system. */ static TimestampTz last_snapshot_ts; -static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr; +static XLogRecPtr last_progress_lsn = InvalidXLogRecPtr; /* * Flags set by interrupt handlers for later service in the main loop. @@ -310,7 +310,7 @@ BackgroundWriterMain(void) * check whether there has been any WAL inserted since the last time * we've logged a running xacts. * - * We do this logging in the bgwriter as its the only process that is + * We do this logging in the bgwriter as it is the only process that is * run regularly and returns to its mainloop all the time. E.g. * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. @@ -319,19 +319,23 @@ BackgroundWriterMain(void) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); + XLogRecPtr current_progress_lsn = GetProgressRecPtr(); timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS); /* - * only log if enough time has passed and some xlog record has - * been inserted. + * only log if enough time has passed, that some WAL activity + * has happened since last checkpoint, and that some xlog record + * has been inserted. */ if (now >= timeout && - last_snapshot_lsn != GetXLogInsertRecPtr()) + GetLastCheckpointRecPtr() < current_progress_lsn && + last_progress_lsn < current_progress_lsn) { - last_snapshot_lsn = LogStandbySnapshot(); + (void) LogStandbySnapshot(); last_snapshot_ts = now; + last_progress_lsn = current_progress_lsn; } } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 6a9bf84..75c367c 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -952,7 +952,8 @@ LogStandbySnapshot(void) * The definitions of RunningTransactionsData and xl_xact_running_xacts * are similar. We keep them separate because xl_xact_running_xacts * is a contiguous chunk of memory and never exists fully until it is - * assembled in WAL. + * assembled in WAL. Progress of WAL activity is not updated when + * this record is logged. */ static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) @@ -976,7 +977,9 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) XLogRegisterData((char *) CurrRunningXacts->xids, (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId)); - recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS); + recptr = XLogInsertExtended(RM_STANDBY_ID, + XLOG_RUNNING_XACTS, + XLOG_INSERT_NO_PROGRESS); if (CurrRunningXacts->subxid_overflow) elog(trace_recovery(DEBUG2), @@ -1024,7 +1027,8 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks) XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks)); XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock)); - (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK); + (void) XLogInsertExtended(RM_STANDBY_ID, XLOG_STANDBY_LOCK, + XLOG_INSERT_NO_PROGRESS); } /* diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ecd30ce..7844844 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -210,7 +210,9 @@ extern CheckpointStatsData CheckpointStats; struct XLogRecData; -extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn); +extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, + XLogRecPtr fpw_lsn, + uint8 flags); extern void XLogFlush(XLogRecPtr RecPtr); extern bool XLogBackgroundFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); @@ -261,6 +263,8 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); +extern XLogRecPtr GetProgressRecPtr(void); +extern XLogRecPtr GetLastCheckpointRecPtr(void); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void RemovePromoteSignalFiles(void); diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index cc0177e..6ff0176 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -38,10 +38,15 @@ #define REGBUF_KEEP_DATA 0x10/* include data even if a full-page image is * taken */ +/* flags for XLogInsertExtended */ +#define XLOG_INSERT_NO_PROGRESS 0x01 /* do not update progress LSN + * when inserting record */ + /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); extern void XLogIncludeOrigin(void); extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); +extern XLogRecPtr XLogInsertExtended(RmgrId rmid, uint8 info, uint8 flags); extern void XLogEnsureRecordSpace(int nbuffers, int ndatas); extern void XLogRegisterData(char *data, int len); extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers