Hi, On Mon, 23 Dec 2019 15:38:16 +0100 Jehan-Guillaume de Rorthais <j...@dalibo.com> wrote: [...] > My idea would be to return a row from pg_stat_get_wal_receiver() as soon as > a wal receiver has been replicating during the uptime of the standby, no > matter if there's one currently working or not. If no wal receiver is active, > the "pid" field would be NULL and the "status" would reports eg. "inactive". > All other fields would report their last known value as they are kept in > shared memory WalRcv struct.
Please, find in attachment a patch implementing the above proposal. Regards,
>From 5641d8c5d46968873d8b8e1d3c1c0de10551741e Mon Sep 17 00:00:00 2001 From: Jehan-Guillaume de Rorthais <j...@dalibo.com> Date: Tue, 31 Dec 2019 18:29:13 +0100 Subject: [PATCH] Always expose available stats from wal receiver Makes admin function pg_stat_get_wal_receiver() return available data from WalRcv in shared memory, whatever the state of the wal receiver process. This allows supervision or HA tools to gather various physical replication stats even when the wal receiver is stopped. For example, the latest timeline the wal receiver was receiving before shutting down. The behavior of the pg_stat_wal_receiver view has been kept to avoid regressions: it returns no row when the wal receiver is shut down. --- src/backend/replication/walreceiver.c | 14 +++++--------- src/test/recovery/t/004_timeline_switch.pl | 12 +++++++++++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index a4de8a9cd8..1207f145b8 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1354,13 +1354,6 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS) strlcpy(conninfo, (char *) WalRcv->conninfo, sizeof(conninfo)); SpinLockRelease(&WalRcv->mutex); - /* - * No WAL receiver (or not ready yet), just return a tuple with NULL - * values - */ - if (pid == 0 || !ready_to_display) - PG_RETURN_NULL(); - /* determine result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); @@ -1369,7 +1362,10 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS) nulls = palloc0(sizeof(bool) * tupdesc->natts); /* Fetch values */ - values[0] = Int32GetDatum(pid); + if (pid == 0) + nulls[0] = true; + else + values[0] = Int32GetDatum(pid); if (!is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS)) { @@ -1422,7 +1418,7 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS) nulls[12] = true; else values[12] = Int32GetDatum(sender_port); - if (*conninfo == '\0') + if (*conninfo == '\0' || !ready_to_display) nulls[13] = true; else values[13] = CStringGetTextDatum(conninfo); diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl index 7e952d3667..cdcdd2d981 100644 --- a/src/test/recovery/t/004_timeline_switch.pl +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -6,7 +6,7 @@ use warnings; use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 2; +use Test::More tests => 4; $ENV{PGDATABASE} = 'postgres'; @@ -37,6 +37,11 @@ $node_master->safe_psql('postgres', $node_master->wait_for_catchup($node_standby_1, 'replay', $node_master->lsn('write')); +# Check received timeline from pg_stat_get_wal_receiver() on standby 1 +my $node_standby_1_lsn = $node_standby_1->safe_psql('postgres', + 'SELECT received_tli FROM pg_stat_get_wal_receiver()'); +is($node_standby_1_lsn, 1, 'check received timeline on standby 1'); + # Stop and remove master $node_master->teardown_node; @@ -66,3 +71,8 @@ $node_standby_1->wait_for_catchup($node_standby_2, 'replay', my $result = $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); is($result, qq(2000), 'check content of standby 2'); + +# Check received timeline from pg_stat_get_wal_receiver() on standby 2 +my $node_standby_2_lsn = $node_standby_2->safe_psql('postgres', + 'SELECT received_tli FROM pg_stat_get_wal_receiver()'); +is($node_standby_2_lsn, 2, 'check received timeline on standby 2'); -- 2.20.1