Hi all, On HEAD, xlog.c has the following comment, which has been on my own TODO list for a couple of weeks now:
* TODO: With a bit of extra work we could just start with a pgstat file * associated with the checkpoint redo location we're starting from. Please find a patch series to implement that, giving the possibility to keep statistics after a crash rather than discard them. I have been looking at the code for a while, before settling down on: - Forcing the flush of the pgstats file to happen during non-shutdown checkpoint and restart points, after updating the control file's redo LSN and the critical sections in the area. - Leaving the current before_shmem_exit() callback around, as a matter of delaying the flush of the stats for as long as possible in a shutdown sequence. This also makes the single-user mode shutdown simpler. - Adding the redo LSN to the pgstats file, with a bump of PGSTAT_FILE_FORMAT_ID, cross-checked with the redo LSN. This change is independently useful on its own when loading stats after a clean startup, as well. - The crash recovery case is simplified, as there is no more need for the "discard" code path. - Not using a logic where I would need to stick a LSN into the stats file name, implying that we would need a potential lookup at the contents of pg_stat/ to clean up past files at crash recovery. These should not be costly, but I'd rather not add more of these. 7ff23c6d277d, that has removed the last call of CreateCheckPoint() from the startup process, is older than 5891c7a8ed8f, still it seems to me that pgstats relies on some areas of the code that don't make sense on HEAD (see locking mentioned at the top of the write routine for example). The logic gets straight-forward to think about as restart points and checkpoints always run from the checkpointer, implying that pgstat_write_statsfile() is already called only from the postmaster in single-user mode or the checkpointer itself, at shutdown. Attached is a patch set, with the one being the actual feature, with some stuff prior to that: - 0001 adds the redo LSN to the pgstats file flushed. - 0002 adds an assertion in pgstat_write_statsfile(), to check from where it is called. - 0003 with more debugging. - 0004 is the meat of the thread. I am adding that to the next CF. Thoughts and comments are welcome. Thanks, -- Michael
From a39ffddc1c525fa4e39fd08657c6dae95a20043f Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 18 Jun 2024 11:00:36 +0900 Subject: [PATCH 1/4] Add redo LSN to pgstats file This is used in the startup process to check that the file we are loading is the one referring to the latest checkpoint. Bump PGSTAT_FILE_FORMAT_ID. --- src/include/pgstat.h | 5 +++-- src/backend/access/transam/xlog.c | 2 +- src/backend/utils/activity/pgstat.c | 26 +++++++++++++++++++------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 2136239710..dbc3430b18 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -11,6 +11,7 @@ #ifndef PGSTAT_H #define PGSTAT_H +#include "access/xlogdefs.h" #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ @@ -235,7 +236,7 @@ typedef struct PgStat_TableXactStatus * ------------------------------------------------------------ */ -#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAC +#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAD typedef struct PgStat_ArchiverStats { @@ -466,7 +467,7 @@ extern Size StatsShmemSize(void); extern void StatsShmemInit(void); /* Functions called during server startup / shutdown */ -extern void pgstat_restore_stats(void); +extern void pgstat_restore_stats(XLogRecPtr redo); extern void pgstat_discard_stats(void); extern void pgstat_before_server_shutdown(int code, Datum arg); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 330e058c5f..96b458da42 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5641,7 +5641,7 @@ StartupXLOG(void) if (didCrash) pgstat_discard_stats(); else - pgstat_restore_stats(); + pgstat_restore_stats(checkPoint.redo); lastFullPageWrites = checkPoint.fullPageWrites; diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index dcc2ad8d95..824f742bde 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -94,6 +94,7 @@ #include <unistd.h> #include "access/xact.h" +#include "access/xlog.h" #include "lib/dshash.h" #include "pgstat.h" #include "port/atomics.h" @@ -162,8 +163,8 @@ typedef struct PgStat_SnapshotEntry * ---------- */ -static void pgstat_write_statsfile(void); -static void pgstat_read_statsfile(void); +static void pgstat_write_statsfile(XLogRecPtr redo); +static void pgstat_read_statsfile(XLogRecPtr redo); static void pgstat_reset_after_failure(void); @@ -404,9 +405,9 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = { * Should only be called by the startup process or in single user mode. */ void -pgstat_restore_stats(void) +pgstat_restore_stats(XLogRecPtr redo) { - pgstat_read_statsfile(); + pgstat_read_statsfile(redo); } /* @@ -482,7 +483,7 @@ pgstat_before_server_shutdown(int code, Datum arg) if (code == 0) { pgStatLocal.shmem->is_shutdown = true; - pgstat_write_statsfile(); + pgstat_write_statsfile(GetRedoRecPtr()); } } @@ -1305,7 +1306,7 @@ write_chunk(FILE *fpout, void *ptr, size_t len) * stats so locking is not required. */ static void -pgstat_write_statsfile(void) +pgstat_write_statsfile(XLogRecPtr redo) { FILE *fpout; int32 format_id; @@ -1340,6 +1341,9 @@ pgstat_write_statsfile(void) format_id = PGSTAT_FILE_FORMAT_ID; write_chunk_s(fpout, &format_id); + /* Write the redo LSN, used to cross check the file loaded */ + write_chunk_s(fpout, &redo); + /* * XXX: The following could now be generalized to just iterate over * pgstat_kind_infos instead of knowing about the different kinds of @@ -1480,13 +1484,14 @@ read_chunk(FILE *fpin, void *ptr, size_t len) * stats so locking is not required. */ static void -pgstat_read_statsfile(void) +pgstat_read_statsfile(XLogRecPtr redo) { FILE *fpin; int32 format_id; bool found; const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME; PgStat_ShmemControl *shmem = pgStatLocal.shmem; + XLogRecPtr file_redo; /* shouldn't be called from postmaster */ Assert(IsUnderPostmaster || !IsPostmasterEnvironment); @@ -1520,6 +1525,13 @@ pgstat_read_statsfile(void) format_id != PGSTAT_FILE_FORMAT_ID) goto error; + /* + * Read the redo LSN stored in the file. + */ + if (!read_chunk_s(fpin, &file_redo) || + file_redo != redo) + goto error; + /* * XXX: The following could now be generalized to just iterate over * pgstat_kind_infos instead of knowing about the different kinds of -- 2.45.1
From 9b252f56d1cad3e12001829bbb61c51cc742e9e0 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 18 Jun 2024 10:59:31 +0900 Subject: [PATCH 2/4] Add assertion in pgstat_write_statsfile() This routine can currently only be called from the postmaster in single-user mode or the checkpointer, so make sure that this is always the case. --- src/backend/utils/activity/pgstat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 824f742bde..9cdd986582 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -1317,6 +1317,9 @@ pgstat_write_statsfile(XLogRecPtr redo) pgstat_assert_is_up(); + /* should be called only by the checkpointer or single user mode */ + Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER); + /* we're shutting down, so it's ok to just override this */ pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; -- 2.45.1
From ed7fe49cf147b09def2b78554a989d592e02fe07 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 18 Jun 2024 11:10:19 +0900 Subject: [PATCH 3/4] Add some DEBUG2 information about the redo LSN of the stats file This is useful for.. Debugging. How surprising. --- src/backend/utils/activity/pgstat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 9cdd986582..855dec9e52 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -1323,7 +1323,8 @@ pgstat_write_statsfile(XLogRecPtr redo) /* we're shutting down, so it's ok to just override this */ pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; - elog(DEBUG2, "writing stats file \"%s\"", statfile); + elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile, + LSN_FORMAT_ARGS(redo)); /* * Open the statistics temp file to write out the current values. @@ -1499,7 +1500,8 @@ pgstat_read_statsfile(XLogRecPtr redo) /* shouldn't be called from postmaster */ Assert(IsUnderPostmaster || !IsPostmasterEnvironment); - elog(DEBUG2, "reading stats file \"%s\"", statfile); + elog(DEBUG2, "reading stats file \"%s\" with redo %X/%X", statfile, + LSN_FORMAT_ARGS(redo)); /* * Try to open the stats file. If it doesn't exist, the backends simply -- 2.45.1
From d206073d3cc6a0a5ed9228cdd3089c24d88f51f6 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 18 Jun 2024 14:49:33 +0900 Subject: [PATCH 4/4] Flush pgstats file during checkpoints The flushes are done for non-shutdown checkpoints and restart points, so as it is possible to keep some statistics around in the event of a crash. Keeping the before_shmem_exit() callback to flush the pgstats file in a shutdown sequence is a bit easier than doing so at checkpoint, as they may be stats to flush before we are completely done with the shutdown checkpoint. Keeping the callback also keeps the shutdown of single-user mode simpler, where the stats also need to be flushed. At the beginning of recovery, the stats file is loaded when the redo LSN it stores matches with the point we are replaying from. --- src/include/pgstat.h | 4 +- src/backend/access/transam/xlog.c | 27 ++++++++--- src/backend/utils/activity/pgstat.c | 62 ++++++------------------ src/test/recovery/t/029_stats_restart.pl | 22 +++++++-- 4 files changed, 54 insertions(+), 61 deletions(-) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index dbc3430b18..9860bb6a99 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -466,9 +466,9 @@ typedef struct PgStat_PendingWalStats extern Size StatsShmemSize(void); extern void StatsShmemInit(void); -/* Functions called during server startup / shutdown */ +/* Functions called during server startup / checkpoint / shutdown */ extern void pgstat_restore_stats(XLogRecPtr redo); -extern void pgstat_discard_stats(void); +extern void pgstat_flush_stats(XLogRecPtr redo); extern void pgstat_before_server_shutdown(int code, Datum arg); /* Functions for backend initialization */ diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 96b458da42..7ce304c227 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5390,7 +5390,6 @@ StartupXLOG(void) XLogCtlInsert *Insert; CheckPoint checkPoint; bool wasShutdown; - bool didCrash; bool haveTblspcMap; bool haveBackupLabel; XLogRecPtr EndOfLog; @@ -5510,10 +5509,7 @@ StartupXLOG(void) { RemoveTempXlogFiles(); SyncDataDirectory(); - didCrash = true; } - else - didCrash = false; /* * Prepare for WAL recovery if needed. @@ -5638,10 +5634,7 @@ StartupXLOG(void) * TODO: With a bit of extra work we could just start with a pgstat file * associated with the checkpoint redo location we're starting from. */ - if (didCrash) - pgstat_discard_stats(); - else - pgstat_restore_stats(checkPoint.redo); + pgstat_restore_stats(checkPoint.redo); lastFullPageWrites = checkPoint.fullPageWrites; @@ -7203,6 +7196,15 @@ CreateCheckPoint(int flags) */ END_CRIT_SECTION(); + /* + * The control file update is done, hence it is time to write some fresh + * statistics. Stats are flushed at shutdown by the checkpointer in a + * dedicated before_shmem_exit callback, combined with sanity checks + * related to the shutdown of pgstats. + */ + if (!shutdown) + pgstat_flush_stats(RedoRecPtr); + /* * WAL summaries end when the next XLOG_CHECKPOINT_REDO or * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point @@ -7671,6 +7673,15 @@ CreateRestartPoint(int flags) } LWLockRelease(ControlFileLock); + /* + * The control file update is done, hence it is time to write some fresh + * statistics. Stats are flushed at shutdown by the checkpointer in a + * dedicated before_shmem_exit callback, combined with sanity checks + * related to the shutdown of pgstats. + */ + if ((flags & CHECKPOINT_IS_SHUTDOWN) == 0) + pgstat_flush_stats(RedoRecPtr); + /* * Update the average distance between checkpoints/restartpoints if the * prior checkpoint exists. diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 855dec9e52..a0d891aefc 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -10,9 +10,10 @@ * statistics. * * Statistics are loaded from the filesystem during startup (by the startup - * process), unless preceded by a crash, in which case all stats are - * discarded. They are written out by the checkpointer process just before - * shutting down, except when shutting down in immediate mode. + * process) if the stats file has a redo LSN that matches with the . They + * are written out by the checkpointer during checkpoints and restart points, + * as well as before shutting down, except when shutting down in immediate + * mode. * * Fixed-numbered stats are stored in plain (non-dynamic) shared memory. * @@ -411,53 +412,19 @@ pgstat_restore_stats(XLogRecPtr redo) } /* - * Remove the stats file. This is currently used only if WAL recovery is - * needed after a crash. + * Write stats in memory to disk. * - * Should only be called by the startup process or in single user mode. + * This is called by the checkpointer or in single-user mode. */ void -pgstat_discard_stats(void) +pgstat_flush_stats(XLogRecPtr redo) { - int ret; - - /* NB: this needs to be done even in single user mode */ - - ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME); - if (ret != 0) - { - if (errno == ENOENT) - elog(DEBUG2, - "didn't need to unlink permanent stats file \"%s\" - didn't exist", - PGSTAT_STAT_PERMANENT_FILENAME); - else - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not unlink permanent statistics file \"%s\": %m", - PGSTAT_STAT_PERMANENT_FILENAME))); - } - else - { - ereport(DEBUG2, - (errcode_for_file_access(), - errmsg_internal("unlinked permanent statistics file \"%s\"", - PGSTAT_STAT_PERMANENT_FILENAME))); - } - - /* - * Reset stats contents. This will set reset timestamps of fixed-numbered - * stats to the current time (no variable stats exist). - */ - pgstat_reset_after_failure(); + pgstat_write_statsfile(redo); } /* * pgstat_before_server_shutdown() needs to be called by exactly one process - * during regular server shutdowns. Otherwise all stats will be lost. - * - * We currently only write out stats for proc_exit(0). We might want to change - * that at some point... But right now pgstat_discard_stats() would be called - * during the start after a disorderly shutdown, anyway. + * during regular server shutdowns. */ void pgstat_before_server_shutdown(int code, Datum arg) @@ -482,6 +449,9 @@ pgstat_before_server_shutdown(int code, Datum arg) */ if (code == 0) { + /* we're shutting down, so it's ok to just override this */ + pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; + pgStatLocal.shmem->is_shutdown = true; pgstat_write_statsfile(GetRedoRecPtr()); } @@ -1302,8 +1272,8 @@ write_chunk(FILE *fpout, void *ptr, size_t len) #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr)) /* - * This function is called in the last process that is accessing the shared - * stats so locking is not required. + * This function is called in the checkpointer or in single-user mode, + * so locking is not required. */ static void pgstat_write_statsfile(XLogRecPtr redo) @@ -1320,9 +1290,6 @@ pgstat_write_statsfile(XLogRecPtr redo) /* should be called only by the checkpointer or single user mode */ Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER); - /* we're shutting down, so it's ok to just override this */ - pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; - elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile, LSN_FORMAT_ARGS(redo)); @@ -1402,7 +1369,6 @@ pgstat_write_statsfile(XLogRecPtr redo) CHECK_FOR_INTERRUPTS(); /* we may have some "dropped" entries not yet removed, skip them */ - Assert(!ps->dropped); if (ps->dropped) continue; diff --git a/src/test/recovery/t/029_stats_restart.pl b/src/test/recovery/t/029_stats_restart.pl index 93a7209f69..9956a6ab82 100644 --- a/src/test/recovery/t/029_stats_restart.pl +++ b/src/test/recovery/t/029_stats_restart.pl @@ -59,7 +59,7 @@ ok(-f "$og_stats", "origin stats file must exist"); copy($og_stats, $statsfile) or die "Copy failed: $!"; -## test discarding of stats file after crash etc +## test retention of stats file after crash etc $node->start; @@ -69,12 +69,28 @@ is(have_stats('function', $dboid, $funcoid), 't', "$sect: function stats do exist"); is(have_stats('relation', $dboid, $tableoid), 't', "$sect: relation stats do exist"); +# Flush the stats to disk. +$node->psql('postgres', 'checkpoint'); $node->stop('immediate'); -ok(!-f "$og_stats", "no stats file should exist after immediate shutdown"); +ok(-f "$og_stats", "stats file should exist after immediate shutdown"); -# copy the old stats back to test we discard stats after crash restart +# Start once, there should be stats restored from the previous checkpoint. +$node->start; + +$sect = "crashrecovery"; +is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); +is(have_stats('function', $dboid, $funcoid), + 't', "$sect: function stats do exist"); +is(have_stats('relation', $dboid, $tableoid), + 't', "$sect: relation stats do exist"); + +$node->stop('immediate'); + +# Copy the old stats back, and test that we discard stats after crash restart +# because these don't match with the redo LSN stored in the stats file +# generated by the previous checkpoint. copy($statsfile, $og_stats) or die "Copy failed: $!"; $node->start; -- 2.45.1
signature.asc
Description: PGP signature