On Fri, Feb 22, 2019 at 3:23 PM Magnus Hagander <mag...@hagander.net> wrote:
> > > On Fri, Feb 22, 2019 at 3:16 PM Julien Rouhaud <rjuju...@gmail.com> wrote: > >> On Fri, Feb 22, 2019 at 3:01 PM Magnus Hagander <mag...@hagander.net> >> wrote: >> > >> > PFA is a patch to do this. >> >> +void >> +pgstat_report_checksum_failure(void) >> +{ >> + PgStat_MsgDeadlock msg; >> >> I think that you meant PgStat_MsgChecksumFailure :) >> >> +/* ---------- >> + * pgstat_recv_checksum_failure() - >> + * >> + * Process a DEADLOCK message. >> + * ---------- >> >> same here >> >> Otherwise LGTM. >> > > Haha, damit, that's embarassing. You can probably guess where I > copy/pasted from :) > > And of course, then I forgot to attach the new file. -- Magnus Hagander Me: https://www.hagander.net/ <http://www.hagander.net/> Work: https://www.redpill-linpro.com/ <http://www.redpill-linpro.com/>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 0e73cdcdda..e2630fd368 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -2509,6 +2509,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i <entry>Number of deadlocks detected in this database</entry> </row> <row> + <entry><structfield>checksum_failures</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Number of data page checksum failures detected in this database</entry> + </row> + <row> <entry><structfield>blk_read_time</structfield></entry> <entry><type>double precision</type></entry> <entry>Time spent reading data file blocks by backends in this database, diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 3e229c693c..7723f01327 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -823,6 +823,7 @@ CREATE VIEW pg_stat_database AS pg_stat_get_db_temp_files(D.oid) AS temp_files, pg_stat_get_db_temp_bytes(D.oid) AS temp_bytes, pg_stat_get_db_deadlocks(D.oid) AS deadlocks, + pg_stat_get_db_checksum_failures(D.oid) AS checksum_failures, pg_stat_get_db_blk_read_time(D.oid) AS blk_read_time, pg_stat_get_db_blk_write_time(D.oid) AS blk_write_time, pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 81c6499251..f09028d208 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -334,6 +334,7 @@ static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); +static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); /* ------------------------------------------------------------ @@ -1518,6 +1519,26 @@ pgstat_report_deadlock(void) pgstat_send(&msg, sizeof(msg)); } + +/* -------- + * pgstat_report_checksum_failure() - + * + * Tell the collector about a checksum failure. + * -------- + */ +void +pgstat_report_checksum_failure(void) +{ + PgStat_MsgChecksumFailure msg; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CHECKSUMFAILURE); + msg.m_databaseid = MyDatabaseId; + pgstat_send(&msg, sizeof(msg)); +} + /* -------- * pgstat_report_tempfile() - * @@ -4455,6 +4476,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_tempfile((PgStat_MsgTempFile *) &msg, len); break; + case PGSTAT_MTYPE_CHECKSUMFAILURE: + pgstat_recv_checksum_failure((PgStat_MsgChecksumFailure *) &msg, len); + break; + default: break; } @@ -4554,6 +4579,7 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry) dbentry->n_temp_files = 0; dbentry->n_temp_bytes = 0; dbentry->n_deadlocks = 0; + dbentry->n_checksum_failures = 0; dbentry->n_block_read_time = 0; dbentry->n_block_write_time = 0; @@ -6197,6 +6223,22 @@ pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len) } /* ---------- + * pgstat_recv_checksum_failure() - + * + * Process a CHECKSUMFAILURE message. + * ---------- + */ +static void +pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len) +{ + PgStat_StatDBEntry *dbentry; + + dbentry = pgstat_get_db_entry(msg->m_databaseid, true); + + dbentry->n_checksum_failures ++; +} + +/* ---------- * pgstat_recv_tempfile() - * * Process a TEMPFILE message. diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 517220bc33..14bc61b8ad 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/itup.h" #include "access/xlog.h" +#include "pgstat.h" #include "storage/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -151,6 +152,8 @@ PageIsVerified(Page page, BlockNumber blkno) errmsg("page verification failed, calculated checksum %u but expected %u", checksum, p->pd_checksum))); + pgstat_report_checksum_failure(); + if (header_sane && ignore_checksum_failure) return true; } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 69f7265779..da1d685c08 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1498,6 +1498,21 @@ pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS) } Datum +pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_checksum_failures); + + PG_RETURN_INT64(result); +} + +Datum pg_stat_get_db_blk_read_time(PG_FUNCTION_ARGS) { Oid dbid = PG_GETARG_OID(0); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index a4e173b484..911fad1af9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5233,6 +5233,10 @@ proname => 'pg_stat_get_db_deadlocks', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_deadlocks' }, +{ oid => '3425', descr => 'statistics: checksum failures detected in database', + proname => 'pg_stat_get_db_checksum_failures', provolatile => 's', proparallel => 'r', + prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_checksum_failures' }, { oid => '3074', descr => 'statistics: last reset for a database', proname => 'pg_stat_get_db_stat_reset_time', provolatile => 's', proparallel => 'r', prorettype => 'timestamptz', proargtypes => 'oid', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 88a75fb798..b9e6463f2b 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -64,7 +64,8 @@ typedef enum StatMsgType PGSTAT_MTYPE_FUNCPURGE, PGSTAT_MTYPE_RECOVERYCONFLICT, PGSTAT_MTYPE_TEMPFILE, - PGSTAT_MTYPE_DEADLOCK + PGSTAT_MTYPE_DEADLOCK, + PGSTAT_MTYPE_CHECKSUMFAILURE } StatMsgType; /* ---------- @@ -530,6 +531,17 @@ typedef struct PgStat_MsgDeadlock Oid m_databaseid; } PgStat_MsgDeadlock; +/* ---------- + * PgStat_MsgChecksumFailure Sent by the backend to tell the collector + * about checksum failures noticed. + * ---------- + */ +typedef struct PgStat_MsgChecksumFailure +{ + PgStat_MsgHdr m_hdr; + Oid m_databaseid; +} PgStat_MsgChecksumFailure; + /* ---------- * PgStat_Msg Union over all possible messages. @@ -593,6 +605,7 @@ typedef struct PgStat_StatDBEntry PgStat_Counter n_temp_files; PgStat_Counter n_temp_bytes; PgStat_Counter n_deadlocks; + PgStat_Counter n_checksum_failures; PgStat_Counter n_block_read_time; /* times in microseconds */ PgStat_Counter n_block_write_time; @@ -1200,6 +1213,7 @@ extern void pgstat_report_analyze(Relation rel, extern void pgstat_report_recovery_conflict(int reason); extern void pgstat_report_deadlock(void); +extern void pgstat_report_checksum_failure(void); extern void pgstat_initialize(void); extern void pgstat_bestart(void); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 98f417cb57..e0f2c543ef 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1817,6 +1817,7 @@ pg_stat_database| SELECT d.oid AS datid, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, + pg_stat_get_db_checksum_failures(d.oid) AS checksum_failures, pg_stat_get_db_blk_read_time(d.oid) AS blk_read_time, pg_stat_get_db_blk_write_time(d.oid) AS blk_write_time, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset