On Sat, Jan 30, 2016 at 11:08 PM, Michael Paquier <michael.paqu...@gmail.com> wrote: > On Fri, Jan 29, 2016 at 9:13 PM, Michael Paquier wrote: >> Well, to put it short, I am just trying to find a way to make the >> backend skip unnecessary checkpoints on an idle system, which results >> in the following WAL pattern if system is completely idle: >> CHECKPOINT_ONLINE >> RUNNING_XACTS >> RUNNING_XACTS >> [etc..] >> >> The thing is that I am lost with the meaning of this condition to >> decide if a checkpoint should be skipped or not: >> if (prevPtr == ControlFile->checkPointCopy.redo && >> prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) >> { >> WALInsertLockRelease(); >> LWLockRelease(CheckpointLock); >> return; >> } >> As at least one standby snapshot is logged before the checkpoint >> record, the redo position is never going to match the previous insert >> LSN, so checkpoints will never be skipped if wal_level >= hot_standby. >> Skipping such unnecessary checkpoints is what you would like to >> address, no? Because that's what I would like to do here first. And >> once we got that right, we could think about addressing the case where >> WAL segments are forcibly archived for idle systems. > > I have put a bit more of brain power into that, and finished with the > patch attached. A new field called chkpProgressLSN is attached to > XLogCtl, which is to the current insert position of the checkpoint > when wal_level <= archive, or the LSN position of the standby snapshot > taken after a checkpoint. The bgwriter code is modified as well so as > it uses this progress LSN and compares it with the current insert LSN > to determine if a standby snapshot should be logged or not. On an > instance of Postgres completely idle, no useless checkpoints, and no > useless standby snapshots are generated anymore.
Attached is an additional patch that I have used for my tests (should have sent that yesterday). This just shows up a couple of logs in the bgwriter and around checkpoint to see in more details their activity with not that much noise. -- Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ccef3f0..e51b97e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8259,12 +8259,20 @@ CreateCheckPoint(int flags) * better if you don't need to keep two WAL segments around to recover the * checkpoint. */ + elog(LOG, "prevPtr %X/%X, curInsert %X/%X, progress %X/%X, redo %X/%X", + (uint32) (prevPtr >> 32), (uint32) prevPtr, + (uint32) (curInsert >> 32), (uint32) curInsert, + (uint32) (GetProgressRecPtr() >> 32), (uint32) GetProgressRecPtr(), + (uint32) (ControlFile->checkPointCopy.redo >> 32), + (uint32) ControlFile->checkPointCopy.redo); if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_FORCE)) == 0) { + elog(LOG, "Not a forced or shutdown checkpoint"); if (GetProgressRecPtr() == prevPtr && prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) { + elog(LOG, "Checkpoint is skipped"); WALInsertLockRelease(); LWLockRelease(CheckpointLock); END_CRIT_SECTION(); @@ -8437,7 +8445,11 @@ CreateCheckPoint(int flags) * snapshots need to be logged, and skip them on idle systems. */ if (!shutdown && XLogStandbyInfoActive()) + { progress_lsn = LogStandbySnapshot(); + elog(LOG, "snapshot taken by checkpoint %X/%X", + (uint32) (progress_lsn >> 32), (uint32) progress_lsn); + } START_CRIT_SECTION(); diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 22374b0..d55e446 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -308,17 +308,25 @@ BackgroundWriterMain(void) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); + XLogRecPtr progress_lsn = GetProgressRecPtr(); + XLogRecPtr insert_lsn = GetInsertRecPtr(); timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS); + elog(LOG, "bgwriter progress_lsn %X/%X, insert_lsn %X/%X", + (uint32) (progress_lsn >> 32), (uint32) progress_lsn, + (uint32) (insert_lsn >> 32), (uint32) insert_lsn); /* * only log if enough time has passed. */ if (now >= timeout && - GetProgressRecPtr() != GetInsertRecPtr()) + progress_lsn != insert_lsn) { - (void) LogStandbySnapshot(); + XLogRecPtr lsn; + lsn = LogStandbySnapshot(); + elog(LOG, "snapshot taken by bgwriter %X/%X", + (uint32) (lsn >> 32), (uint32) lsn); last_snapshot_ts = now; } }
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a2846c4..ccef3f0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -527,6 +527,8 @@ typedef struct XLogCtlData TransactionId ckptXid; XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */ XLogRecPtr replicationSlotMinLSN; /* oldest LSN needed by any slot */ + XLogRecPtr ckptProgressLSN; /* LSN tracking progress of checkpoint + * and standby snapshots */ XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG * segment */ @@ -6314,6 +6316,7 @@ StartupXLOG(void) checkPoint.newestCommitTsXid); XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; + XLogCtl->ckptProgressLSN = checkPoint.redo; /* * Initialize replication slots, before there's a chance to remove @@ -7869,6 +7872,22 @@ GetFlushRecPtr(void) } /* + * GetProgressRecPtr -- Returns the reference position of last checkpoint, + * taking into account standby snapshots generated by checkpoints. + */ +XLogRecPtr +GetProgressRecPtr(void) +{ + XLogRecPtr progress_lsn; + + SpinLockAcquire(&XLogCtl->info_lck); + progress_lsn = XLogCtl->ckptProgressLSN; + SpinLockRelease(&XLogCtl->info_lck); + + return progress_lsn; +} + +/* * Get the time of the last xlog segment switch */ pg_time_t @@ -8133,6 +8152,7 @@ CreateCheckPoint(int flags) XLogRecPtr PriorRedoPtr; XLogRecPtr curInsert; XLogRecPtr prevPtr; + XLogRecPtr progress_lsn; VirtualTransactionId *vxids; int nvxids; @@ -8214,10 +8234,13 @@ CreateCheckPoint(int flags) /* * We must block concurrent insertions while examining insert state to - * determine the checkpoint REDO pointer. + * determine the checkpoint REDO pointer. The progress LSN of this + * to-be-created checkpoint is for now initialized as the current insert + * position in WAL. This will get updated later on depending on if + * a standby snapshot is logged. */ WALInsertLockAcquireExclusive(); - curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos); + curInsert = progress_lsn = XLogBytePosToRecPtr(Insert->CurrBytePos); prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos); /* @@ -8239,7 +8262,7 @@ CreateCheckPoint(int flags) if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_FORCE)) == 0) { - if (prevPtr == ControlFile->checkPointCopy.redo && + if (GetProgressRecPtr() == prevPtr && prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE) { WALInsertLockRelease(); @@ -8406,9 +8429,15 @@ CreateCheckPoint(int flags) * * If we are shutting down, or Startup process is completing crash * recovery we don't need to write running xact data. + * + * progress_lsn is aimed at tracking the WAL progress that happens + * since this checkpoint. If a standby snapshot is logged, its record + * postion need to be taken into account in the progress LSN position + * that is used to evaluate if checkpoints are necessary or standby + * snapshots need to be logged, and skip them on idle systems. */ if (!shutdown && XLogStandbyInfoActive()) - LogStandbySnapshot(); + progress_lsn = LogStandbySnapshot(); START_CRIT_SECTION(); @@ -8478,10 +8507,15 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* + * Update shared-memory copy of checkpoint XID/epoch and the LSN + * record tracking progress of checkpoint and standby snapshot + * activity. + */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; + XLogCtl->ckptProgressLSN = progress_lsn; SpinLockRelease(&XLogCtl->info_lck); /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 010b5fc..22374b0 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -78,12 +78,10 @@ int BgWriterDelay = 200; #define LOG_SNAPSHOT_INTERVAL_MS 15000 /* - * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid - * doing so too often or repeatedly if there has been no other write activity - * in the system. + * Timestamp at which we last issued a LogStandbySnapshot(), to avoid doing + * so too often. */ static TimestampTz last_snapshot_ts; -static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr; /* * Flags set by interrupt handlers for later service in the main loop. @@ -301,8 +299,8 @@ BackgroundWriterMain(void) * check whether there has been any WAL inserted since the last time * we've logged a running xacts. * - * We do this logging in the bgwriter as its the only process thats - * run regularly and returns to its mainloop all the time. E.g. + * We do this logging in the bgwriter as it is the only process that + * is run regularly and returns to its mainloop all the time. E.g. * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. */ @@ -315,13 +313,12 @@ BackgroundWriterMain(void) LOG_SNAPSHOT_INTERVAL_MS); /* - * only log if enough time has passed and some xlog record has - * been inserted. + * only log if enough time has passed. */ if (now >= timeout && - last_snapshot_lsn != GetXLogInsertRecPtr()) + GetProgressRecPtr() != GetInsertRecPtr()) { - last_snapshot_lsn = LogStandbySnapshot(); + (void) LogStandbySnapshot(); last_snapshot_ts = now; } } diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ecd30ce..41cb512 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -261,6 +261,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); +extern XLogRecPtr GetProgressRecPtr(void); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void RemovePromoteSignalFiles(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers