Hello!
On 06.12.2022 21:44, Andres Freund wrote:
Hi,
On 2022-09-19 01:29:21 +0300, Anton A. Melnikov wrote:
Corrected patch is attached (v2-0001-Fix-burst-checkpoint_req-growth.patch).
This patch doesn't pass the main regression tests tests successfully:
https://cirrus-ci.com/task/5502700019253248
diff -U3 /tmp/cirrus-ci-build/src/test/regress/expected/rules.out
/tmp/cirrus-ci-build/build/testrun/regress/regress/results/rules.out
--- /tmp/cirrus-ci-build/src/test/regress/expected/rules.out 2022-12-06
05:49:53.687424000 +0000
+++ /tmp/cirrus-ci-build/build/testrun/regress/regress/results/rules.out
2022-12-06 05:53:04.642690000 +0000
@@ -1816,6 +1816,9 @@
FROM pg_stat_get_archiver() s(archived_count, last_archived_wal,
last_archived_time, failed_count, last_failed_wal, last_failed_time,
stats_reset);
pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS
checkpoints_timed,
pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req,
+ pg_stat_get_bgwriter_timed_restartpoints() AS restartpoints_timed,
+ pg_stat_get_bgwriter_requested_restartpoints() AS restartpoints_req,
+ pg_stat_get_bgwriter_performed_restartpoints() AS restartpoints_done,
pg_stat_get_checkpoint_write_time() AS checkpoint_write_time,
pg_stat_get_checkpoint_sync_time() AS checkpoint_sync_time,
pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint,
Greetings,
Andres Freund
Thank you for pointing!
I didn't think that the patch tester would apply both patch variants
simultaneously,
assuming that these are two different possible solutions of the problem.
But it's even good, let it check both at once!
There was an error in the second variant (Add-restartpoint-stats), i forgot to
correct the rules.out.
So fixed the second variant and rebased the first one
(Fix-burst-checkpoint_req-growth)
to the current master.
With the best wishes,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
commit 6f3ab7b6b5562aaf68ccfdd6dde17c27c2d2869e
Author: Anton A. Melnikov <a.melni...@postgrespro.ru>
Date: Wed Dec 7 10:10:58 2022 +0300
Remove burst growth of the checkpoint_req on replica.
with correcttions according to comment: https://www.postgresql.org/message-id/20220907.163946.2214338139097492905.horikyota.ntt%40gmail.com
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a31fbbff78..02d86bf370 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8935,3 +8935,20 @@ SetWalWriterSleeping(bool sleeping)
XLogCtl->WalWriterSleeping = sleeping;
SpinLockRelease(&XLogCtl->info_lck);
}
+
+/*
+ * Check if a new checkpoint WAL record has been received since the
+ * last restartpoint. So it is possible to create a new one.
+ */
+bool RestartPointAvailable(void)
+{
+ bool result = false;
+
+ SpinLockAcquire(&XLogCtl->info_lck);
+ if (!XLogRecPtrIsInvalid(XLogCtl->lastCheckPointRecPtr)
+ && XLogCtl->lastCheckPoint.redo > XLogCtl->RedoRecPtr)
+ result = true;
+ SpinLockRelease(&XLogCtl->info_lck);
+
+ return result;
+}
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 97b882564f..a88c716197 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -3196,7 +3196,15 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
{
(void) GetRedoRecPtr();
if (XLogCheckpointNeeded(readSegNo))
- RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
+ {
+ /*
+ * If there is no new checkpoint WAL records since the
+ * last restartpoint the creation of new one
+ * will certainly fail, so skip it.
+ */
+ if (RestartPointAvailable())
+ RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
+ }
}
}
diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h
index f3398425d8..dcc2e64ca7 100644
--- a/src/include/access/xlogrecovery.h
+++ b/src/include/access/xlogrecovery.h
@@ -83,6 +83,7 @@ extern void InitWalRecovery(ControlFileData *ControlFile,
bool *wasShutdown_ptr, bool *haveBackupLabel_ptr,
bool *haveTblspcMap_ptr);
extern void PerformWalRecovery(void);
+extern bool RestartPointAvailable(void);
/*
* FinishWalRecovery() returns this. It contains information about the point
commit 9ec1f90bbe407da94f8e940084e8815b4f8e874b
Author: Anton A. Melnikov <a.melni...@postgrespro.ru>
Date: Wed Dec 7 10:49:19 2022 +0300
Add statistic about restartpoint into pg_stat_bgwriter
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 2d8104b090..658cafdc03 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1107,6 +1107,9 @@ CREATE VIEW pg_stat_bgwriter AS
SELECT
pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed,
pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req,
+ pg_stat_get_bgwriter_timed_restartpoints() AS restartpoints_timed,
+ pg_stat_get_bgwriter_requested_restartpoints() AS restartpoints_req,
+ pg_stat_get_bgwriter_performed_restartpoints() AS restartpoints_done,
pg_stat_get_checkpoint_write_time() AS checkpoint_write_time,
pg_stat_get_checkpoint_sync_time() AS checkpoint_sync_time,
pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint,
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 5fc076fc14..2296701ddf 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -350,6 +350,8 @@ CheckpointerMain(void)
pg_time_t now;
int elapsed_secs;
int cur_timeout;
+ bool chkpt_or_rstpt_requested = false;
+ bool chkpt_or_rstpt_timed = false;
/* Clear any already-pending wakeups */
ResetLatch(MyLatch);
@@ -368,7 +370,7 @@ CheckpointerMain(void)
if (((volatile CheckpointerShmemStruct *) CheckpointerShmem)->ckpt_flags)
{
do_checkpoint = true;
- PendingCheckpointerStats.requested_checkpoints++;
+ chkpt_or_rstpt_requested = true;
}
/*
@@ -382,7 +384,7 @@ CheckpointerMain(void)
if (elapsed_secs >= CheckPointTimeout)
{
if (!do_checkpoint)
- PendingCheckpointerStats.timed_checkpoints++;
+ chkpt_or_rstpt_timed = true;
do_checkpoint = true;
flags |= CHECKPOINT_CAUSE_TIME;
}
@@ -418,6 +420,24 @@ CheckpointerMain(void)
if (flags & CHECKPOINT_END_OF_RECOVERY)
do_restartpoint = false;
+ if (chkpt_or_rstpt_requested)
+ {
+ chkpt_or_rstpt_requested = false;
+ if (do_restartpoint)
+ PendingCheckpointerStats.requested_restartpoints++;
+ else
+ PendingCheckpointerStats.requested_checkpoints++;
+ }
+
+ if (chkpt_or_rstpt_timed)
+ {
+ chkpt_or_rstpt_timed = false;
+ if (do_restartpoint)
+ PendingCheckpointerStats.timed_restartpoints++;
+ else
+ PendingCheckpointerStats.timed_checkpoints++;
+ }
+
/*
* We will warn if (a) too soon since last checkpoint (whatever
* caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
@@ -481,6 +501,9 @@ CheckpointerMain(void)
* checkpoints happen at a predictable spacing.
*/
last_checkpoint_time = now;
+
+ if (do_restartpoint)
+ PendingCheckpointerStats.performed_restartpoints++;
}
else
{
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
index af8d513e7b..9de99a2cf1 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -49,6 +49,9 @@ pgstat_report_checkpointer(void)
#define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld += PendingCheckpointerStats.fld
CHECKPOINTER_ACC(timed_checkpoints);
CHECKPOINTER_ACC(requested_checkpoints);
+ CHECKPOINTER_ACC(timed_restartpoints);
+ CHECKPOINTER_ACC(requested_restartpoints);
+ CHECKPOINTER_ACC(performed_restartpoints);
CHECKPOINTER_ACC(checkpoint_write_time);
CHECKPOINTER_ACC(checkpoint_sync_time);
CHECKPOINTER_ACC(buf_written_checkpoints);
@@ -112,6 +115,9 @@ pgstat_checkpointer_snapshot_cb(void)
#define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -= reset.fld;
CHECKPOINTER_COMP(timed_checkpoints);
CHECKPOINTER_COMP(requested_checkpoints);
+ CHECKPOINTER_COMP(timed_restartpoints);
+ CHECKPOINTER_COMP(requested_restartpoints);
+ CHECKPOINTER_COMP(performed_restartpoints);
CHECKPOINTER_COMP(checkpoint_write_time);
CHECKPOINTER_COMP(checkpoint_sync_time);
CHECKPOINTER_COMP(buf_written_checkpoints);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 25a159b5e5..4c6b98dcce 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1138,6 +1138,24 @@ pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS)
PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->requested_checkpoints);
}
+Datum
+pg_stat_get_bgwriter_timed_restartpoints(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->timed_restartpoints);
+}
+
+Datum
+pg_stat_get_bgwriter_requested_restartpoints(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->requested_restartpoints);
+}
+
+Datum
+pg_stat_get_bgwriter_performed_restartpoints(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->performed_restartpoints);
+}
+
Datum
pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f9301b2627..5d31ce40f7 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5637,6 +5637,21 @@
proname => 'pg_stat_get_bgwriter_requested_checkpoints', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => '',
prosrc => 'pg_stat_get_bgwriter_requested_checkpoints' },
+{ oid => '9938',
+ descr => 'statistics: number of timed restartpoints started by the bgwriter',
+ proname => 'pg_stat_get_bgwriter_timed_restartpoints', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => '',
+ prosrc => 'pg_stat_get_bgwriter_timed_restartpoints' },
+{ oid => '9939',
+ descr => 'statistics: number of backend requested restartpoints started by the bgwriter',
+ proname => 'pg_stat_get_bgwriter_requested_restartpoints', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => '',
+ prosrc => 'pg_stat_get_bgwriter_requested_restartpoints' },
+{ oid => '9940',
+ descr => 'statistics: number of backend performed restartpoints started by the bgwriter',
+ proname => 'pg_stat_get_bgwriter_performed_restartpoints', provolatile => 's',
+ proparallel => 'r', prorettype => 'int8', proargtypes => '',
+ prosrc => 'pg_stat_get_bgwriter_performed_restartpoints' },
{ oid => '2771',
descr => 'statistics: number of buffers written by the bgwriter during checkpoints',
proname => 'pg_stat_get_bgwriter_buf_written_checkpoints', provolatile => 's',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index a3df8d27c3..cfc384ec5b 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -269,6 +269,9 @@ typedef struct PgStat_CheckpointerStats
{
PgStat_Counter timed_checkpoints;
PgStat_Counter requested_checkpoints;
+ PgStat_Counter timed_restartpoints;
+ PgStat_Counter requested_restartpoints;
+ PgStat_Counter performed_restartpoints;
PgStat_Counter checkpoint_write_time; /* times in milliseconds */
PgStat_Counter checkpoint_sync_time;
PgStat_Counter buf_written_checkpoints;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index fb9f936d43..0809264ad5 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1816,6 +1816,9 @@ pg_stat_archiver| SELECT s.archived_count,
FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, stats_reset);
pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed,
pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req,
+ pg_stat_get_bgwriter_timed_restartpoints() AS restartpoints_timed,
+ pg_stat_get_bgwriter_requested_restartpoints() AS restartpoints_req,
+ pg_stat_get_bgwriter_performed_restartpoints() AS restartpoints_done,
pg_stat_get_checkpoint_write_time() AS checkpoint_write_time,
pg_stat_get_checkpoint_sync_time() AS checkpoint_sync_time,
pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint,