Hello, Attached patch implements the following TODO item :
Track number of WAL files ready to be archived in pg_stat_archiver However, it will track the total number of any file ready to be archived, not only WAL files. Please let me know what you think about it. Regards. -- Julien Rouhaud http://dalibo.com - http://dalibo.org
*** a/doc/src/sgml/monitoring.sgml --- b/doc/src/sgml/monitoring.sgml *************** *** 728,733 **** postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser --- 728,738 ---- <entry>Time of the last failed archival operation</entry> </row> <row> + <entry><structfield>ready_count</></entry> + <entry><type>bigint</type></entry> + <entry>Number of files waiting to be archived</entry> + </row> + <row> <entry><structfield>stats_reset</></entry> <entry><type>timestamp with time zone</type></entry> <entry>Time at which these statistics were last reset</entry> *** a/src/backend/access/transam/xlogarchive.c --- b/src/backend/access/transam/xlogarchive.c *************** *** 24,29 **** --- 24,30 ---- #include "access/xlog_internal.h" #include "miscadmin.h" #include "postmaster/startup.h" + #include "pgstat.h" #include "replication/walsender.h" #include "storage/fd.h" #include "storage/ipc.h" *************** *** 539,544 **** XLogArchiveNotify(const char *xlog) --- 540,548 ---- /* Notify archiver that it's got something to do */ if (IsUnderPostmaster) SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER); + + /* Tell the collector about a new file waiting to be archived */ + pgstat_send_archiver(xlog, ARCH_READY); } /* *** a/src/backend/catalog/system_views.sql --- b/src/backend/catalog/system_views.sql *************** *** 697,702 **** CREATE VIEW pg_stat_archiver AS --- 697,703 ---- s.failed_count, s.last_failed_wal, s.last_failed_time, + s.ready_count, s.stats_reset FROM pg_stat_get_archiver() s; *** a/src/backend/postmaster/pgarch.c --- b/src/backend/postmaster/pgarch.c *************** *** 491,497 **** pgarch_ArchiverCopyLoop(void) * Tell the collector about the WAL file that we successfully * archived */ ! pgstat_send_archiver(xlog, false); break; /* out of inner retry loop */ } --- 491,497 ---- * Tell the collector about the WAL file that we successfully * archived */ ! pgstat_send_archiver(xlog, ARCH_SUCCESS); break; /* out of inner retry loop */ } *************** *** 501,507 **** pgarch_ArchiverCopyLoop(void) * Tell the collector about the WAL file that we failed to * archive */ ! pgstat_send_archiver(xlog, true); if (++failures >= NUM_ARCHIVE_RETRIES) { --- 501,507 ---- * Tell the collector about the WAL file that we failed to * archive */ ! pgstat_send_archiver(xlog, ARCH_FAIL); if (++failures >= NUM_ARCHIVE_RETRIES) { *** a/src/backend/postmaster/pgstat.c --- b/src/backend/postmaster/pgstat.c *************** *** 36,41 **** --- 36,42 ---- #include "access/transam.h" #include "access/twophase_rmgr.h" #include "access/xact.h" + #include "access/xlog_internal.h" #include "catalog/pg_database.h" #include "catalog/pg_proc.h" #include "lib/ilist.h" *************** *** 3084,3094 **** pgstat_send(void *msg, int len) * pgstat_send_archiver() - * * Tell the collector about the WAL file that we successfully ! * archived or failed to archive. * ---------- */ void ! pgstat_send_archiver(const char *xlog, bool failed) { PgStat_MsgArchiver msg; --- 3085,3096 ---- * pgstat_send_archiver() - * * Tell the collector about the WAL file that we successfully ! * archived or failed to archive, or the new file waiting ! * to be archived. * ---------- */ void ! pgstat_send_archiver(const char *xlog, ArchiverReason reason) { PgStat_MsgArchiver msg; *************** *** 3096,3102 **** pgstat_send_archiver(const char *xlog, bool failed) * Prepare and send the message */ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ARCHIVER); ! msg.m_failed = failed; strncpy(msg.m_xlog, xlog, sizeof(msg.m_xlog)); msg.m_timestamp = GetCurrentTimestamp(); pgstat_send(&msg, sizeof(msg)); --- 3098,3104 ---- * Prepare and send the message */ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ARCHIVER); ! msg.m_reason = reason; strncpy(msg.m_xlog, xlog, sizeof(msg.m_xlog)); msg.m_timestamp = GetCurrentTimestamp(); pgstat_send(&msg, sizeof(msg)); *************** *** 3921,3927 **** pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) /* * Try to open the stats file. If it doesn't exist, the backends simply * return zero for anything and the collector simply starts from scratch ! * with empty counters. * * ENOENT is a possibility if the stats collector is not running or has * not yet written the stats file the first time. Any other failure --- 3923,3930 ---- /* * Try to open the stats file. If it doesn't exist, the backends simply * return zero for anything and the collector simply starts from scratch ! * with empty counters, except for the .ready files count which should ! * always give the real number of files. * * ENOENT is a possibility if the stats collector is not running or has * not yet written the stats file the first time. Any other failure *************** *** 3934,3939 **** pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) --- 3937,3970 ---- (errcode_for_file_access(), errmsg("could not open statistics file \"%s\": %m", statfile))); + + /* Initialize the archive ready counter */ + char XLogArchiveStatusDir[MAXPGPATH]; + DIR *rldir; + struct dirent *rlde; + + snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); + rldir = AllocateDir(XLogArchiveStatusDir); + if (rldir == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open archive status directory \"%s\": %m", + XLogArchiveStatusDir))); + + while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL) + { + int basenamelen = (int) strlen(rlde->d_name) - 6; + + if (basenamelen >= MIN_XFN_CHARS && + basenamelen <= MAX_XFN_CHARS && + strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen && + strcmp(rlde->d_name + basenamelen, ".ready") == 0) + { + ++archiverStats.ready_count; + } + } + FreeDir(rldir); + return dbhash; } *************** *** 4842,4849 **** pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len) } else if (msg->m_resettarget == RESET_ARCHIVER) { ! /* Reset the archiver statistics for the cluster. */ memset(&archiverStats, 0, sizeof(archiverStats)); archiverStats.stat_reset_timestamp = GetCurrentTimestamp(); } --- 4873,4887 ---- } else if (msg->m_resettarget == RESET_ARCHIVER) { ! PgStat_Counter ready_count; ! /* ! * Reset the archiver statistics for the cluster. ! * We must keep the ready_count value as it should ! * always reflect the real count. ! */ ! ready_count = archiverStats.ready_count; memset(&archiverStats, 0, sizeof(archiverStats)); + archiverStats.ready_count = ready_count; archiverStats.stat_reset_timestamp = GetCurrentTimestamp(); } *************** *** 4984,5004 **** pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len) static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len) { ! if (msg->m_failed) ! { ! /* Failed archival attempt */ ! ++archiverStats.failed_count; ! memcpy(archiverStats.last_failed_wal, msg->m_xlog, ! sizeof(archiverStats.last_failed_wal)); ! archiverStats.last_failed_timestamp = msg->m_timestamp; ! } ! else { ! /* Successful archival operation */ ! ++archiverStats.archived_count; ! memcpy(archiverStats.last_archived_wal, msg->m_xlog, ! sizeof(archiverStats.last_archived_wal)); ! archiverStats.last_archived_timestamp = msg->m_timestamp; } } --- 5022,5048 ---- static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len) { ! switch (msg->m_reason) { ! case ARCH_FAIL: ! /* Failed archival attempt */ ! ++archiverStats.failed_count; ! memcpy(archiverStats.last_failed_wal, msg->m_xlog, ! sizeof(archiverStats.last_failed_wal)); ! archiverStats.last_failed_timestamp = msg->m_timestamp; ! break; ! case ARCH_SUCCESS: ! /* Successful archival operation */ ! ++archiverStats.archived_count; ! memcpy(archiverStats.last_archived_wal, msg->m_xlog, ! sizeof(archiverStats.last_archived_wal)); ! archiverStats.last_archived_timestamp = msg->m_timestamp; ! --archiverStats.ready_count; ! break; ! case ARCH_READY: ! /* New file waiting to be archived */ ! ++archiverStats.ready_count; ! break; } } *** a/src/backend/utils/adt/pgstatfuncs.c --- b/src/backend/utils/adt/pgstatfuncs.c *************** *** 1746,1752 **** pg_stat_get_archiver(PG_FUNCTION_ARGS) MemSet(nulls, 0, sizeof(nulls)); /* Initialise attributes information in the tuple descriptor */ ! tupdesc = CreateTemplateTupleDesc(7, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal", --- 1746,1752 ---- MemSet(nulls, 0, sizeof(nulls)); /* Initialise attributes information in the tuple descriptor */ ! tupdesc = CreateTemplateTupleDesc(8, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal", *************** *** 1759,1765 **** pg_stat_get_archiver(PG_FUNCTION_ARGS) TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time", TIMESTAMPTZOID, -1, 0); ! TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); --- 1759,1767 ---- TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time", TIMESTAMPTZOID, -1, 0); ! TupleDescInitEntry(tupdesc, (AttrNumber) 7, "ready_count", ! INT8OID, -1, 0); ! TupleDescInitEntry(tupdesc, (AttrNumber) 8, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); *************** *** 1790,1799 **** pg_stat_get_archiver(PG_FUNCTION_ARGS) else values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp); if (archiver_stats->stat_reset_timestamp == 0) ! nulls[6] = true; else ! values[6] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp); /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum( --- 1792,1802 ---- else values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp); + values[6] = Int64GetDatum(archiver_stats->ready_count); if (archiver_stats->stat_reset_timestamp == 0) ! nulls[7] = true; else ! values[7] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp); /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum( *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** *** 2751,2757 **** DATA(insert OID = 2844 ( pg_stat_get_db_blk_read_time PGNSP PGUID 12 1 0 0 0 f DESCR("statistics: block read time, in msec"); DATA(insert OID = 2845 ( pg_stat_get_db_blk_write_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 701 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_blk_write_time _null_ _null_ _null_ )); DESCR("statistics: block write time, in msec"); ! DATA(insert OID = 3195 ( pg_stat_get_archiver PGNSP PGUID 12 1 0 0 0 f f f f f f s 0 0 2249 "" "{20,25,1184,20,25,1184,1184}" "{o,o,o,o,o,o,o}" "{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,stats_reset}" _null_ pg_stat_get_archiver _null_ _null_ _null_ )); DESCR("statistics: information about WAL archiver"); DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ )); DESCR("statistics: number of timed checkpoints started by the bgwriter"); --- 2751,2757 ---- DESCR("statistics: block read time, in msec"); DATA(insert OID = 2845 ( pg_stat_get_db_blk_write_time PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 701 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_blk_write_time _null_ _null_ _null_ )); DESCR("statistics: block write time, in msec"); ! DATA(insert OID = 3195 ( pg_stat_get_archiver PGNSP PGUID 12 1 0 0 0 f f f f f f s 0 0 2249 "" "{20,25,1184,20,25,1184,20,1184}" "{o,o,o,o,o,o,o,o}" "{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,ready_count,stats_reset}" _null_ pg_stat_get_archiver _null_ _null_ _null_ )); DESCR("statistics: information about WAL archiver"); DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ )); DESCR("statistics: number of timed checkpoints started by the bgwriter"); *** a/src/include/pgstat.h --- b/src/include/pgstat.h *************** *** 376,382 **** typedef struct PgStat_MsgAnalyze typedef struct PgStat_MsgArchiver { PgStat_MsgHdr m_hdr; ! bool m_failed; /* Failed attempt */ char m_xlog[MAX_XFN_CHARS + 1]; TimestampTz m_timestamp; } PgStat_MsgArchiver; --- 376,382 ---- typedef struct PgStat_MsgArchiver { PgStat_MsgHdr m_hdr; ! int m_reason; char m_xlog[MAX_XFN_CHARS + 1]; TimestampTz m_timestamp; } PgStat_MsgArchiver; *************** *** 651,656 **** typedef struct PgStat_ArchiverStats --- 651,657 ---- char last_failed_wal[MAX_XFN_CHARS + 1]; /* WAL file involved in * last failure */ TimestampTz last_failed_timestamp; /* last archival failure time */ + PgStat_Counter ready_count; /* Number of files waiting to be archived */ TimestampTz stat_reset_timestamp; } PgStat_ArchiverStats; *************** *** 690,695 **** typedef enum BackendState --- 691,707 ---- } BackendState; /* ---------- + * Archiver reason + * ---------- + */ + typedef enum ArchiverReason + { + ARCH_SUCCESS, + ARCH_FAIL, + ARCH_READY, + } ArchiverReason; + + /* ---------- * Shared-memory data structures * ---------- */ *************** *** 934,940 **** extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, void *recdata, uint32 len); ! extern void pgstat_send_archiver(const char *xlog, bool failed); extern void pgstat_send_bgwriter(void); /* ---------- --- 946,952 ---- extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, void *recdata, uint32 len); ! extern void pgstat_send_archiver(const char *xlog, ArchiverReason reason); extern void pgstat_send_bgwriter(void); /* ---------- *** a/src/test/regress/expected/rules.out --- b/src/test/regress/expected/rules.out *************** *** 1659,1666 **** pg_stat_archiver| SELECT s.archived_count, s.failed_count, s.last_failed_wal, s.last_failed_time, s.stats_reset ! FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, stats_reset); pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_checkpoint_write_time() AS checkpoint_write_time, --- 1659,1667 ---- s.failed_count, s.last_failed_wal, s.last_failed_time, + s.ready_count, s.stats_reset ! FROM pg_stat_get_archiver() s(archived_count, last_archived_wal, last_archived_time, failed_count, last_failed_wal, last_failed_time, ready_count, stats_reset); pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_checkpoint_write_time() AS checkpoint_write_time,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers