Andres Freud wrote: > On 2015-11-20 16:11:15 +0900, Michael Paquier wrote: >> diff --git a/src/backend/access/transam/xlog.c >> b/src/backend/access/transam/xlog.c >> index cc845d2..4883697 100644 >> --- a/src/backend/access/transam/xlog.c >> +++ b/src/backend/access/transam/xlog.c >> @@ -9503,6 +9503,14 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) >> data += sizeof(BkpBlock); >> >> RestoreBackupBlockContents(lsn, bkpb, data, false, false); >> + >> + if (bkpb.fork == INIT_FORKNUM) >> + { >> + SMgrRelation srel; >> + srel = smgropen(bkpb.node, InvalidBackendId); >> + smgrimmedsync(srel, INIT_FORKNUM); >> + smgrclose(srel); >> + } >> } >> else if (info == XLOG_BACKUP_END) >> { > > A smgrwrite() instead of a smgrimmedsync() should be sufficient here.
Check. On Fri, Dec 4, 2015 at 6:35 AM, Andres Freund <and...@anarazel.de> wrote: > On 2015-11-27 16:59:20 +0900, Michael Paquier wrote: >> Attached is a patch that fixes the issue for me in master and 9.5. >> Actually in the last patch I forgot a call to smgrwrite to ensure that >> the INIT_FORKNUM is correctly synced to disk when those pages are >> replayed at recovery, letting the reset routines for unlogged >> relations do their job correctly. I have noticed as well that we need >> to do the same for gin and brin relations. In this case I think that >> we could limit the flush to unlogged relations, my patch does it >> unconditionally though to generalize the logic. Thoughts? > > I think it's a good idea to limit this to unlogged relations. For a > dataset that fits into shared_buffers and creates many relations, the > additional disk writes could be noticeable. OK, then I have switched the patch this way to limit the flush to unlogged relations where needed. >> diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c >> index 99337b0..d7964ac 100644 >> --- a/src/backend/access/brin/brin.c >> +++ b/src/backend/access/brin/brin.c >> @@ -682,7 +682,12 @@ brinbuildempty(PG_FUNCTION_ARGS) >> brin_metapage_init(BufferGetPage(metabuf), BrinGetPagesPerRange(index), >> BRIN_CURRENT_VERSION); >> MarkBufferDirty(metabuf); >> - log_newpage_buffer(metabuf, false); >> + /* >> + * When this full page image is replayed, there is no guarantee that >> + * this page will be present to disk when replayed particularly for >> + * unlogged relations, hence enforce it to be flushed to disk. >> + */ > > The grammar seems a bit off here. Check. >> + /* >> + * Initialize and xlog metabuffer and root buffer. When those full >> + * pages are replayed, it is not guaranteed that those relation >> + * init forks will be flushed to disk after replaying them, hence >> + * enforce those pages to be flushed to disk at replay, only the >> + * last record will enforce a flush for performance reasons and >> + * because it is actually unnecessary to do it multiple times. >> + */ > > That comment needs some love too. I think it really only makes sense if > you know the current state. There's also some language polishing needed. Check. >> @@ -450,14 +450,21 @@ ginbuildempty(PG_FUNCTION_ARGS) >> START_CRIT_SECTION(); >> GinInitMetabuffer(MetaBuffer); >> MarkBufferDirty(MetaBuffer); >> - log_newpage_buffer(MetaBuffer, false); >> + log_newpage_buffer(MetaBuffer, false, false); >> GinInitBuffer(RootBuffer, GIN_LEAF); >> MarkBufferDirty(RootBuffer); >> - log_newpage_buffer(RootBuffer, false); >> + log_newpage_buffer(RootBuffer, false, true); >> END_CRIT_SECTION(); >> > Why isn't the metapage flushed to disk? I was not sure if we should only flush only at the last page, as pages just before would be already replayed. Switched. >> --- a/src/backend/access/spgist/spginsert.c >> +++ b/src/backend/access/spgist/spginsert.c >> @@ -173,7 +173,7 @@ spgbuildempty(PG_FUNCTION_ARGS) >> (char *) page, true); >> if (XLogIsNeeded()) >> log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, >> - SPGIST_METAPAGE_BLKNO, page, false); >> + SPGIST_METAPAGE_BLKNO, page, false, >> false); >> >> /* Likewise for the root page. */ >> SpGistInitPage(page, SPGIST_LEAF); >> @@ -183,7 +183,7 @@ spgbuildempty(PG_FUNCTION_ARGS) >> (char *) page, true); >> if (XLogIsNeeded()) >> log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, >> - SPGIST_ROOT_BLKNO, page, true); >> + SPGIST_ROOT_BLKNO, page, true, false); >> > > I don't think it's correct to not flush in these cases. Yes, the primary > does an smgrimmedsync() - but that's not done on the standby. OK. Switched. >> @@ -9392,9 +9396,28 @@ xlog_redo(XLogReaderState *record) >> * when checksums are enabled. There is no difference in >> handling >> * XLOG_FPI and XLOG_FPI_FOR_HINT records, they use a >> different info >> * code just to distinguish them for statistics purposes. >> + * >> + * XLOG_FPI_FOR_SYNC records are generated when a relation >> needs to >> + * be sync'ed just after replaying a full page. This is >> important >> + * to match the master behavior in the case where a page is >> written >> + * directly to disk without going through shared buffers, >> particularly >> + * when writing init forks for index relations. >> */ > > How about > > FPI_FOR_SYNC records indicate that the page immediately needs to be > written to disk, not just to shared buffers. This is important if the > on-disk state is to be the authoritative, not the state in shared > buffers. E.g. because on-disk files may later be copied directly. OK. >> diff --git a/src/backend/commands/tablecmds.c >> b/src/backend/commands/tablecmds.c >> index 0ddde72..eb22a51 100644 >> --- a/src/backend/commands/tablecmds.c >> +++ b/src/backend/commands/tablecmds.c >> @@ -9920,7 +9920,8 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, >> * space. >> */ >> if (use_wal) >> - log_newpage(&dst->smgr_rnode.node, forkNum, blkno, >> page, false); >> + log_newpage(&dst->smgr_rnode.node, forkNum, blkno, >> page, >> + false, false); > > Shouldn't this flush data if it's an init fork? Currently that's an > academic distinction, given there'll never be a page, but it still seems > prudent. Yeah it should do that for all the INIT_FORKNUM because we don't know the page type here. Note that use_wal is broken as well. We had better log and flush the INIT_FORKNUM as well for unlogged relations. >> extern void InitXLogInsert(void); >> diff --git a/src/include/catalog/pg_control.h >> b/src/include/catalog/pg_control.h >> index ad1eb4b..91445f1 100644 >> --- a/src/include/catalog/pg_control.h >> +++ b/src/include/catalog/pg_control.h >> @@ -73,6 +73,7 @@ typedef struct CheckPoint >> #define XLOG_END_OF_RECOVERY 0x90 >> #define XLOG_FPI_FOR_HINT 0xA0 >> #define XLOG_FPI 0xB0 >> +#define XLOG_FPI_FOR_SYNC 0xC0 > > > I'm not a big fan of the XLOG_FPI_FOR_SYNC name. Syncing is a bit too > ambigous for my taste. How about either naming it XLOG_FPI_FLUSH or > instead adding actual record data and a 'flags' field in there? I > slightly prefer the latter - XLOG_FPI and XLOG_FPI_FOR_HINT really are > different, XLOG_FPI_FOR_SYNC not so much. Let's go for XLOG_FPI_FLUSH. Attached is an updated patch. For back branches, I am still not sure what would be a good idea though. I don't see any other initiative than flushing unconditionally INIT_FORKNUM when replaying an FPI... But that would impact some systems severely. Regards, -- Michael
From 65d93a69df59512fab217a6a34a80ed15b33ee27 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@otacoo.com> Date: Fri, 4 Dec 2015 16:58:23 +0900 Subject: [PATCH] Ensure consistent on-disk state of UNLOGGED indexes at recovery Unlogged relation indexes need to have a consistent initial state on-disk at the time of replay to ensure that their replayed pages are found on disk should end of recovery happen and subsequently reset those relations. This commit adds a new XLOG record, named XLOG_FLUSH, that allows to enforce a page just replayed to be flushed to disk, ensuring that those relations will have a consistent state when replayed. All types of relation indexes whose persistence is unlogged are impacted by the bug this commit fixes, with various degrees of problems, most of them causing errors on promoted standbys when trying to INSERT new tuples to their parent relations. The worst problem found was with GIN indexes, where trying to insert a new tuple in it caused the system to remain stuck on a semaphore lock, making the system unresponsive. --- src/backend/access/brin/brin.c | 10 +++++++++- src/backend/access/brin/brin_pageops.c | 2 +- src/backend/access/gin/gininsert.c | 14 +++++++++++--- src/backend/access/gist/gist.c | 3 ++- src/backend/access/heap/rewriteheap.c | 6 ++++-- src/backend/access/nbtree/nbtree.c | 2 +- src/backend/access/nbtree/nbtsort.c | 3 ++- src/backend/access/rmgrdesc/xlogdesc.c | 7 ++++++- src/backend/access/spgist/spginsert.c | 6 +++--- src/backend/access/transam/xlog.c | 28 ++++++++++++++++++++++++++-- src/backend/access/transam/xloginsert.c | 14 ++++++++++---- src/backend/commands/tablecmds.c | 15 +++++++++++---- src/backend/commands/vacuumlazy.c | 2 +- src/backend/replication/logical/decode.c | 1 + src/include/access/xloginsert.h | 6 ++++-- src/include/catalog/pg_control.h | 1 + 16 files changed, 93 insertions(+), 27 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 99337b0..b646101 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -682,7 +682,15 @@ brinbuildempty(PG_FUNCTION_ARGS) brin_metapage_init(BufferGetPage(metabuf), BrinGetPagesPerRange(index), BRIN_CURRENT_VERSION); MarkBufferDirty(metabuf); - log_newpage_buffer(metabuf, false); + + /* + * For unlogged relations, this page should be immediately flushed + * to disk after being replayed. This is necessary to ensure that the + * initial on-disk state of unlogged relations is preserved when + * they get reset at the end of recovery. + */ + log_newpage_buffer(metabuf, false, + index->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); END_CRIT_SECTION(); UnlockReleaseBuffer(metabuf); diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index f876f62..572fe20 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -865,7 +865,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer) page = BufferGetPage(buffer); brin_page_init(page, BRIN_PAGETYPE_REGULAR); MarkBufferDirty(buffer); - log_newpage_buffer(buffer, true); + log_newpage_buffer(buffer, true, false); END_CRIT_SECTION(); /* diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 49e9185..17c168a 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -450,14 +450,22 @@ ginbuildempty(PG_FUNCTION_ARGS) ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE); - /* Initialize and xlog metabuffer and root buffer. */ + /* + * Initialize and xlog metabuffer and root buffer. For unlogged + * relations, those pages need to be immediately flushed to disk + * after being replayed. This is necessary to ensure that the + * initial on-disk state of unlogged relations is preserved when + * they get reset at the end of recovery. + */ START_CRIT_SECTION(); GinInitMetabuffer(MetaBuffer); MarkBufferDirty(MetaBuffer); - log_newpage_buffer(MetaBuffer, false); + log_newpage_buffer(MetaBuffer, false, + index->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); GinInitBuffer(RootBuffer, GIN_LEAF); MarkBufferDirty(RootBuffer); - log_newpage_buffer(RootBuffer, false); + log_newpage_buffer(RootBuffer, false, + index->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); END_CRIT_SECTION(); /* Unlock and release the buffers. */ diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 53bccf6..6a20031 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -84,7 +84,8 @@ gistbuildempty(PG_FUNCTION_ARGS) START_CRIT_SECTION(); GISTInitBuffer(buffer, F_LEAF); MarkBufferDirty(buffer); - log_newpage_buffer(buffer, true); + log_newpage_buffer(buffer, true, + index->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); END_CRIT_SECTION(); /* Unlock and release the buffer */ diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 6a6fc3b..e9a9a8f 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -336,7 +336,8 @@ end_heap_rewrite(RewriteState state) MAIN_FORKNUM, state->rs_blockno, state->rs_buffer, - true); + true, + false); RelationOpenSmgr(state->rs_new_rel); PageSetChecksumInplace(state->rs_buffer, state->rs_blockno); @@ -685,7 +686,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) MAIN_FORKNUM, state->rs_blockno, page, - true); + true, + false); /* * Now write the page. We say isTemp = true even if it's not a diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index cf4a6dc..d211a98 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -206,7 +206,7 @@ btbuildempty(PG_FUNCTION_ARGS) (char *) metapage, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - BTREE_METAPAGE, metapage, false); + BTREE_METAPAGE, metapage, false, true); /* * An immediate sync is required even if we xlog'd the page, because the diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index f95f67a..faf611c 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -277,7 +277,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) if (wstate->btws_use_wal) { /* We use the heap NEWPAGE record type for this */ - log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true); + log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, + true, false); } /* diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 83cc9e8..a5abf32 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -77,7 +77,9 @@ xlog_desc(StringInfo buf, XLogReaderState *record) appendStringInfoString(buf, xlrec->rp_name); } - else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT) + else if (info == XLOG_FPI || + info == XLOG_FPI_FOR_HINT || + info == XLOG_FPI_FLUSH) { /* no further information to print */ } @@ -181,6 +183,9 @@ xlog_identify(uint8 info) case XLOG_FPI_FOR_HINT: id = "FPI_FOR_HINT"; break; + case XLOG_FPI_FLUSH: + id = "FPI_FOR_SYNC"; + break; } return id; diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index bceee8d..0758bfd 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -173,7 +173,7 @@ spgbuildempty(PG_FUNCTION_ARGS) (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_METAPAGE_BLKNO, page, false); + SPGIST_METAPAGE_BLKNO, page, false, true); /* Likewise for the root page. */ SpGistInitPage(page, SPGIST_LEAF); @@ -183,7 +183,7 @@ spgbuildempty(PG_FUNCTION_ARGS) (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_ROOT_BLKNO, page, true); + SPGIST_ROOT_BLKNO, page, true, true); /* Likewise for the null-tuples root page. */ SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS); @@ -193,7 +193,7 @@ spgbuildempty(PG_FUNCTION_ARGS) (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_NULL_BLKNO, page, true); + SPGIST_NULL_BLKNO, page, true, true); /* * An immediate sync is required even if we xlog'd the pages, because the diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 86debf4..3640f31 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9188,7 +9188,9 @@ xlog_redo(XLogReaderState *record) XLogRecPtr lsn = record->EndRecPtr; /* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */ - Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT || + Assert(info == XLOG_FPI || + info == XLOG_FPI_FOR_HINT || + info == XLOG_FPI_FLUSH || !XLogRecHasAnyBlockRefs(record)); if (info == XLOG_NEXTOID) @@ -9378,7 +9380,9 @@ xlog_redo(XLogReaderState *record) { /* nothing to do here */ } - else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT) + else if (info == XLOG_FPI || + info == XLOG_FPI_FOR_HINT || + info == XLOG_FPI_FLUSH) { Buffer buffer; @@ -9396,9 +9400,29 @@ xlog_redo(XLogReaderState *record) * when checksums are enabled. There is no difference in handling * XLOG_FPI and XLOG_FPI_FOR_HINT records, they use a different info * code just to distinguish them for statistics purposes. + * + * FPI_FOR_FLUSH records indicate that the page immediately needs to + * be written to disk, not just to shared buffers. This is important + * if the on-disk state is to be the authoritative, not the state + * in shared buffers. E.g. because on-disk files may later be copied + * directly. */ if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED) elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block"); + + if (info == XLOG_FPI_FLUSH) + { + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; + SMgrRelation srel; + + (void) XLogRecGetBlockTag(record, 0, &rnode, &forknum, &blkno); + srel = smgropen(rnode, InvalidBackendId); + smgrwrite(srel, forknum, blkno, BufferGetPage(buffer), false); + smgrclose(srel); + } + UnlockReleaseBuffer(buffer); } else if (info == XLOG_BACKUP_END) diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 925255f..15ed453 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -932,10 +932,13 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) * If the page follows the standard page layout, with a PageHeader and unused * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows * the unused space to be left out from the WAL record, making it smaller. + * + * If 'is_flush' is set to TRUE, relation will be requested to flush + * immediately its data at replay after replaying this full page image. */ XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, - Page page, bool page_std) + Page page, bool page_std, bool is_flush) { int flags; XLogRecPtr recptr; @@ -946,7 +949,7 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, XLogBeginInsert(); XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags); - recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI); + recptr = XLogInsert(RM_XLOG_ID, is_flush ? XLOG_FPI_FLUSH : XLOG_FPI); /* * The page may be uninitialized. If so, we can't set the LSN because that @@ -969,9 +972,12 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, * If the page follows the standard page layout, with a PageHeader and unused * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows * the unused space to be left out from the WAL record, making it smaller. + * + * If 'is_flush' is set to TRUE, relation will be requested to flush + * immediately its data at replay after replaying this full page image. */ XLogRecPtr -log_newpage_buffer(Buffer buffer, bool page_std) +log_newpage_buffer(Buffer buffer, bool page_std, bool is_flush) { Page page = BufferGetPage(buffer); RelFileNode rnode; @@ -983,7 +989,7 @@ log_newpage_buffer(Buffer buffer, bool page_std) BufferGetTag(buffer, &rnode, &forkNum, &blkno); - return log_newpage(&rnode, forkNum, blkno, page, page_std); + return log_newpage(&rnode, forkNum, blkno, page, page_std, is_flush); } /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 0ddde72..dbe7ed9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -9892,9 +9892,14 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* * We need to log the copied data in WAL iff WAL archiving/streaming is - * enabled AND it's a permanent relation. + * enabled AND it's a permanent relation. Unlogged relations need to have + * their INIT_FORKNUM logged as well, and flushed at replay to ensure a + * consistent on-disk state when reset at the end of recovery. */ - use_wal = XLogIsNeeded() && relpersistence == RELPERSISTENCE_PERMANENT; + use_wal = XLogIsNeeded() && + (relpersistence == RELPERSISTENCE_PERMANENT || + (relpersistence == RELPERSISTENCE_UNLOGGED && + forkNum == INIT_FORKNUM)); nblocks = smgrnblocks(src, forkNum); @@ -9917,10 +9922,12 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* * WAL-log the copied page. Unfortunately we don't know what kind of a * page this is, so we have to log the full page including any unused - * space. + * space. For the same reason, pages part of INIT_FORKNUM are always + * forcibly flushed at replay. */ if (use_wal) - log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false); + log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, + false, forkNum == INIT_FORKNUM); PageSetChecksumInplace(page, blkno); diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 2429889..b0e3901 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -736,7 +736,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, */ if (RelationNeedsWAL(onerel) && PageGetLSN(page) == InvalidXLogRecPtr) - log_newpage_buffer(buf, true); + log_newpage_buffer(buf, true, false); PageSetAllVisible(page); visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 9f60687..ba9bb36 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -174,6 +174,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) case XLOG_FPW_CHANGE: case XLOG_FPI_FOR_HINT: case XLOG_FPI: + case XLOG_FPI_FLUSH: break; default: elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index 31b45ba..491caa5 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -53,8 +53,10 @@ extern void XLogResetInsertion(void); extern bool XLogCheckBufferNeedsBackup(Buffer buffer); extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, - BlockNumber blk, char *page, bool page_std); -extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std); + BlockNumber blk, char *page, bool page_std, + bool is_flush); +extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std, + bool is_flush); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); extern void InitXLogInsert(void); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index ad1eb4b..9719980 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -73,6 +73,7 @@ typedef struct CheckPoint #define XLOG_END_OF_RECOVERY 0x90 #define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI 0xB0 +#define XLOG_FPI_FLUSH 0xC0 /* -- 2.6.3
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers