On Mon, Jul 29, 2024 at 04:46:17AM +0000, Bertrand Drouvot wrote: > Thanks! 0001 attached is > v4-0001-Revert-Test-that-vacuum-removes-tuples-older-than.patch > so I guess you did not attached the right one.
I did attach the right set of patches, please ignore 0001 entirely: the patch series is made of three patches, beginning with 0002 :) > Looking at 0002: > > if (!read_chunk(fpin, ptr, info->shared_data_len)) > + { > + elog(WARNING, "could not read data of stats kind %d for entry > of type %c", > + kind, t); > > Nit: what about to include the "info->shared_data_len" value in the WARNING? Good idea, so added. > if (!read_chunk_s(fpin, &name)) > + { > + elog(WARNING, "could not read name of stats kind %d for entry > of type %c", > + kind, t); > goto error; > + } > if (!pgstat_is_kind_valid(kind)) > + { > + elog(WARNING, "invalid stats kind %d for entry of type %c", > + kind, t); > goto error; > + } > > Shouldn't we swap those 2 tests so that we check that the kind is valid right > after this one? Hmm. We could, but this order is not buggy either. So I've let it as-is for now, just adding the WARNINGs. By the way, I have noticed an extra path where a WARNING would not be logged while playing with corrupted pgstats files: when the entry type itself is incorrect. I have added an extra elog() in this case, and applied 0001. Err.. 0002, sorry ;) > Looking at 0003: LGTM > Looking at 0004: LGTM Thanks. Attached are the two remaining patches, for now. I'm planning to get back to these in a few days. -- Michael
From c0c2a3211bea705b49185a63975c86589b46871a Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 23 Jul 2024 12:40:08 +0900 Subject: [PATCH v5 1/2] Add redo LSN to pgstats file This is used in the startup process to check that the file we are loading is the one referring to the latest checkpoint. Bump PGSTAT_FILE_FORMAT_ID. --- src/include/pgstat.h | 5 +++-- src/backend/access/transam/xlog.c | 2 +- src/backend/utils/activity/pgstat.c | 35 +++++++++++++++++++++++------ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 6b99bb8aad..043d39a565 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -11,6 +11,7 @@ #ifndef PGSTAT_H #define PGSTAT_H +#include "access/xlogdefs.h" #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ @@ -235,7 +236,7 @@ typedef struct PgStat_TableXactStatus * ------------------------------------------------------------ */ -#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAD +#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAE typedef struct PgStat_ArchiverStats { @@ -466,7 +467,7 @@ extern Size StatsShmemSize(void); extern void StatsShmemInit(void); /* Functions called during server startup / shutdown */ -extern void pgstat_restore_stats(void); +extern void pgstat_restore_stats(XLogRecPtr redo); extern void pgstat_discard_stats(void); extern void pgstat_before_server_shutdown(int code, Datum arg); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f86f4b5c4b..b7bb4f9a31 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5652,7 +5652,7 @@ StartupXLOG(void) if (didCrash) pgstat_discard_stats(); else - pgstat_restore_stats(); + pgstat_restore_stats(checkPoint.redo); lastFullPageWrites = checkPoint.fullPageWrites; diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 228cdf73f7..65ae488e2a 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -94,6 +94,7 @@ #include <unistd.h> #include "access/xact.h" +#include "access/xlog.h" #include "lib/dshash.h" #include "pgstat.h" #include "port/atomics.h" @@ -171,8 +172,8 @@ typedef struct PgStat_SnapshotEntry * ---------- */ -static void pgstat_write_statsfile(void); -static void pgstat_read_statsfile(void); +static void pgstat_write_statsfile(XLogRecPtr redo); +static void pgstat_read_statsfile(XLogRecPtr redo); static void pgstat_reset_after_failure(void); @@ -448,9 +449,9 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = { * Should only be called by the startup process or in single user mode. */ void -pgstat_restore_stats(void) +pgstat_restore_stats(XLogRecPtr redo) { - pgstat_read_statsfile(); + pgstat_read_statsfile(redo); } /* @@ -526,7 +527,7 @@ pgstat_before_server_shutdown(int code, Datum arg) if (code == 0) { pgStatLocal.shmem->is_shutdown = true; - pgstat_write_statsfile(); + pgstat_write_statsfile(GetRedoRecPtr()); } } @@ -1349,7 +1350,7 @@ write_chunk(FILE *fpout, void *ptr, size_t len) * stats so locking is not required. */ static void -pgstat_write_statsfile(void) +pgstat_write_statsfile(XLogRecPtr redo) { FILE *fpout; int32 format_id; @@ -1387,6 +1388,9 @@ pgstat_write_statsfile(void) format_id = PGSTAT_FILE_FORMAT_ID; write_chunk_s(fpout, &format_id); + /* Write the redo LSN, used to cross check the file read */ + write_chunk_s(fpout, &redo); + /* Write various stats structs for fixed number of objects */ for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++) { @@ -1501,13 +1505,14 @@ read_chunk(FILE *fpin, void *ptr, size_t len) * stats so locking is not required. */ static void -pgstat_read_statsfile(void) +pgstat_read_statsfile(XLogRecPtr redo) { FILE *fpin; int32 format_id; bool found; const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME; PgStat_ShmemControl *shmem = pgStatLocal.shmem; + XLogRecPtr file_redo; /* shouldn't be called from postmaster */ Assert(IsUnderPostmaster || !IsPostmasterEnvironment); @@ -1550,6 +1555,22 @@ pgstat_read_statsfile(void) goto error; } + /* + * Read the redo LSN stored in the file. + */ + if (!read_chunk_s(fpin, &file_redo)) + { + elog(WARNING, "could not read redo LSN"); + goto error; + } + + if (file_redo != redo) + { + elog(WARNING, "found incorrect redo LSN %X/%X (expected %X/%X)", + LSN_FORMAT_ARGS(file_redo), LSN_FORMAT_ARGS(redo)); + goto error; + } + /* * We found an existing statistics file. Read it and put all the stats * data into place. -- 2.45.2
From d9dd1206b2895fe96c0766244cd65a1b9861e783 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Wed, 17 Jul 2024 12:42:43 +0900 Subject: [PATCH v5 2/2] Flush pgstats file during checkpoints The flushes are done for non-shutdown checkpoints and restart points, so as it is possible to keep some statistics around in the event of a crash. Keeping the before_shmem_exit() callback to flush the pgstats file in a shutdown sequence is a bit easier than doing so at checkpoint, as they may be stats to flush before we are completely done with the shutdown checkpoint. Keeping the callback also keeps the shutdown of single-user mode simpler, where the stats also need to be flushed. At the beginning of recovery, the stats file is loaded when the redo LSN it stores matches with the point we are replaying from. The file is not removed anymore after being read, to ensure that there is still a source of stats to feed on should the system crash until the next checkpoint that would update the stats. --- src/include/pgstat.h | 4 +- src/backend/access/transam/xlog.c | 30 +++++--- src/backend/utils/activity/pgstat.c | 68 +++++-------------- src/test/recovery/t/029_stats_restart.pl | 26 +++++-- .../recovery/t/030_stats_cleanup_replica.pl | 2 +- 5 files changed, 59 insertions(+), 71 deletions(-) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 043d39a565..f780b56cc3 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -466,9 +466,9 @@ typedef struct PgStat_PendingWalStats extern Size StatsShmemSize(void); extern void StatsShmemInit(void); -/* Functions called during server startup / shutdown */ +/* Functions called during server startup / checkpoint / shutdown */ extern void pgstat_restore_stats(XLogRecPtr redo); -extern void pgstat_discard_stats(void); +extern void pgstat_flush_stats(XLogRecPtr redo); extern void pgstat_before_server_shutdown(int code, Datum arg); /* Functions for backend initialization */ diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b7bb4f9a31..40422a3f3f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5401,7 +5401,6 @@ StartupXLOG(void) XLogCtlInsert *Insert; CheckPoint checkPoint; bool wasShutdown; - bool didCrash; bool haveTblspcMap; bool haveBackupLabel; XLogRecPtr EndOfLog; @@ -5521,10 +5520,7 @@ StartupXLOG(void) { RemoveTempXlogFiles(); SyncDataDirectory(); - didCrash = true; } - else - didCrash = false; /* * Prepare for WAL recovery if needed. @@ -5645,14 +5641,8 @@ StartupXLOG(void) * * NB: Restoring replication slot stats relies on slot state to have * already been restored from disk. - * - * TODO: With a bit of extra work we could just start with a pgstat file - * associated with the checkpoint redo location we're starting from. */ - if (didCrash) - pgstat_discard_stats(); - else - pgstat_restore_stats(checkPoint.redo); + pgstat_restore_stats(checkPoint.redo); lastFullPageWrites = checkPoint.fullPageWrites; @@ -7244,6 +7234,15 @@ CreateCheckPoint(int flags) */ END_CRIT_SECTION(); + /* + * The control file update is done, hence it is time to write some fresh + * statistics. Stats are flushed at shutdown by the checkpointer in a + * dedicated before_shmem_exit callback, combined with sanity checks + * related to the shutdown of pgstats. + */ + if (!shutdown) + pgstat_flush_stats(RedoRecPtr); + /* * WAL summaries end when the next XLOG_CHECKPOINT_REDO or * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point @@ -7713,6 +7712,15 @@ CreateRestartPoint(int flags) } LWLockRelease(ControlFileLock); + /* + * The control file update is done, hence it is time to write some fresh + * statistics. Stats are flushed at shutdown by the checkpointer in a + * dedicated before_shmem_exit callback, combined with sanity checks + * related to the shutdown of pgstats. + */ + if ((flags & CHECKPOINT_IS_SHUTDOWN) == 0) + pgstat_flush_stats(RedoRecPtr); + /* * Update the average distance between checkpoints/restartpoints if the * prior checkpoint exists. diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 65ae488e2a..3f94d436a2 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -10,9 +10,10 @@ * statistics. * * Statistics are loaded from the filesystem during startup (by the startup - * process), unless preceded by a crash, in which case all stats are - * discarded. They are written out by the checkpointer process just before - * shutting down, except when shutting down in immediate mode. + * process) if the stats file has a redo LSN that matches with the one stored + * in the control file. They are written out by the checkpointer during + * checkpoints and restart points, as well as before shutting down, except + * when shutting down in immediate mode. * * Fixed-numbered stats are stored in plain (non-dynamic) shared memory. * @@ -455,53 +456,19 @@ pgstat_restore_stats(XLogRecPtr redo) } /* - * Remove the stats file. This is currently used only if WAL recovery is - * needed after a crash. + * Write stats in memory to disk. * - * Should only be called by the startup process or in single user mode. + * This is called by the checkpointer or in single-user mode. */ void -pgstat_discard_stats(void) +pgstat_flush_stats(XLogRecPtr redo) { - int ret; - - /* NB: this needs to be done even in single user mode */ - - ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME); - if (ret != 0) - { - if (errno == ENOENT) - elog(DEBUG2, - "didn't need to unlink permanent stats file \"%s\" - didn't exist", - PGSTAT_STAT_PERMANENT_FILENAME); - else - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not unlink permanent statistics file \"%s\": %m", - PGSTAT_STAT_PERMANENT_FILENAME))); - } - else - { - ereport(DEBUG2, - (errcode_for_file_access(), - errmsg_internal("unlinked permanent statistics file \"%s\"", - PGSTAT_STAT_PERMANENT_FILENAME))); - } - - /* - * Reset stats contents. This will set reset timestamps of fixed-numbered - * stats to the current time (no variable stats exist). - */ - pgstat_reset_after_failure(); + pgstat_write_statsfile(redo); } /* * pgstat_before_server_shutdown() needs to be called by exactly one process - * during regular server shutdowns. Otherwise all stats will be lost. - * - * We currently only write out stats for proc_exit(0). We might want to change - * that at some point... But right now pgstat_discard_stats() would be called - * during the start after a disorderly shutdown, anyway. + * during regular server shutdowns. */ void pgstat_before_server_shutdown(int code, Datum arg) @@ -526,6 +493,9 @@ pgstat_before_server_shutdown(int code, Datum arg) */ if (code == 0) { + /* we're shutting down, so it's ok to just override this */ + pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; + pgStatLocal.shmem->is_shutdown = true; pgstat_write_statsfile(GetRedoRecPtr()); } @@ -1346,8 +1316,8 @@ write_chunk(FILE *fpout, void *ptr, size_t len) #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr)) /* - * This function is called in the last process that is accessing the shared - * stats so locking is not required. + * This function is called in the checkpointer or in single-user mode, + * so locking is not required. */ static void pgstat_write_statsfile(XLogRecPtr redo) @@ -1364,10 +1334,8 @@ pgstat_write_statsfile(XLogRecPtr redo) /* should be called only by the checkpointer or single user mode */ Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER); - /* we're shutting down, so it's ok to just override this */ - pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; - - elog(DEBUG2, "writing stats file \"%s\"", statfile); + elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile, + LSN_FORMAT_ARGS(redo)); /* * Open the statistics temp file to write out the current values. @@ -1422,7 +1390,6 @@ pgstat_write_statsfile(XLogRecPtr redo) CHECK_FOR_INTERRUPTS(); /* we may have some "dropped" entries not yet removed, skip them */ - Assert(!ps->dropped); if (ps->dropped) continue; @@ -1751,9 +1718,6 @@ pgstat_read_statsfile(XLogRecPtr redo) done: FreeFile(fpin); - elog(DEBUG2, "removing permanent stats file \"%s\"", statfile); - unlink(statfile); - return; error: diff --git a/src/test/recovery/t/029_stats_restart.pl b/src/test/recovery/t/029_stats_restart.pl index 93a7209f69..73d059b9f9 100644 --- a/src/test/recovery/t/029_stats_restart.pl +++ b/src/test/recovery/t/029_stats_restart.pl @@ -59,7 +59,7 @@ ok(-f "$og_stats", "origin stats file must exist"); copy($og_stats, $statsfile) or die "Copy failed: $!"; -## test discarding of stats file after crash etc +## test retention of stats file after crash etc $node->start; @@ -69,12 +69,28 @@ is(have_stats('function', $dboid, $funcoid), 't', "$sect: function stats do exist"); is(have_stats('relation', $dboid, $tableoid), 't', "$sect: relation stats do exist"); +# Flush the stats to disk. +$node->psql('postgres', 'checkpoint'); $node->stop('immediate'); -ok(!-f "$og_stats", "no stats file should exist after immediate shutdown"); +ok(-f "$og_stats", "stats file should exist after immediate shutdown"); -# copy the old stats back to test we discard stats after crash restart +# Start once, there should be stats restored from the previous checkpoint. +$node->start; + +$sect = "crashrecovery"; +is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); +is(have_stats('function', $dboid, $funcoid), + 't', "$sect: function stats do exist"); +is(have_stats('relation', $dboid, $tableoid), + 't', "$sect: relation stats do exist"); + +$node->stop('immediate'); + +# Copy the old stats back, and test that we discard stats after crash restart +# because these don't match with the redo LSN stored in the stats file +# generated by the previous checkpoint. copy($statsfile, $og_stats) or die "Copy failed: $!"; $node->start; @@ -274,9 +290,9 @@ my $wal_restart_immediate = wal_stats(); cmp_ok( $wal_reset_restart->{reset}, - 'lt', + 'eq', $wal_restart_immediate->{reset}, - "$sect: reset timestamp is new"); + "$sect: reset timestamp is the same"); $node->stop; done_testing(); diff --git a/src/test/recovery/t/030_stats_cleanup_replica.pl b/src/test/recovery/t/030_stats_cleanup_replica.pl index 74b516cc7c..5735921c99 100644 --- a/src/test/recovery/t/030_stats_cleanup_replica.pl +++ b/src/test/recovery/t/030_stats_cleanup_replica.pl @@ -115,7 +115,7 @@ $node_standby->start(); $sect = "post immediate restart"; test_standby_func_tab_stats_status('postgres', - $dboid, $tableoid, $funcoid, 'f'); + $dboid, $tableoid, $funcoid, 't'); done_testing(); -- 2.45.2
signature.asc
Description: PGP signature