On Thu, Nov 2, 2023 at 7:19 AM Peter Smith <smithpb2...@gmail.com> wrote: > > But that's not quite compatible with what Alvaro [2] had written long > back ("... the only acquisitions that would log messages are those in > StartReplication and StartLogicalReplication."). > > In other words, ReplicationSlotAcquire/ReplicationSlotRelease is > called by more places than you care to log from.
I refreshed my thoughts for this patch and I think it's enough if walsenders alone log messages when slots become active and inactive. How about something like the attached v11 patch? -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From eed7c96d9cab98a576728792c2cb111ae61f930f Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Sat, 4 Nov 2023 21:59:27 +0000 Subject: [PATCH v11] Emit messages when replication slots become active and inactive These logs will be extremely useful on production servers to debug and analyze inactive replication slot issues. --- doc/src/sgml/config.sgml | 10 +++--- src/backend/replication/slot.c | 54 ++++++++++++++++++++++++++++- src/backend/replication/walsender.c | 10 +++--- src/include/replication/slot.h | 2 ++ 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index bd70ff2e4b..6d14a94cf4 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7510,11 +7510,11 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' </term> <listitem> <para> - Causes each replication command to be logged in the server log. - See <xref linkend="protocol-replication"/> for more information about - replication command. The default value is <literal>off</literal>. - Only superusers and users with the appropriate <literal>SET</literal> - privilege can change this setting. + Causes each replication command and related activity to be logged in + the server log. See <xref linkend="protocol-replication"/> for more + information about replication command. The default value is + <literal>off</literal>. Only superusers and users with the appropriate + <literal>SET</literal> privilege can change this setting. </para> </listitem> </varlistentry> diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 99823df3c7..dfb564b04f 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -181,7 +181,12 @@ ReplicationSlotShmemExit(int code, Datum arg) { /* Make sure active replication slots are released */ if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); + { + if (am_walsender) + ReplicationSlotReleaseAndLog(); + else + ReplicationSlotRelease(); + } /* Also cleanup all the temporary slots. */ ReplicationSlotCleanup(); @@ -539,6 +544,26 @@ retry: pgstat_acquire_replslot(s); } +/* + * A wrapper around ReplicationSlotAcquire() but logs a message when + * appropriate. + * + * This function is currently used only in walsender. + */ +void +ReplicationSlotAcquireAndLog(const char *name, bool nowait) +{ + Assert(am_walsender); + + ReplicationSlotAcquire(name, nowait); + + ereport(log_replication_commands ? LOG : DEBUG1, + (errmsg("walsender process with PID %d acquired %s replication slot \"%s\"", + MyProcPid, + SlotIsPhysical(MyReplicationSlot) ? "physical" : "logical", + NameStr(MyReplicationSlot->data.name)))); +} + /* * Release the replication slot that this backend considers to own. * @@ -598,6 +623,33 @@ ReplicationSlotRelease(void) LWLockRelease(ProcArrayLock); } +/* + * A wrapper around ReplicationSlotAcquire() but logs a message when + * appropriate. + * + * This function is currently used only in walsender. + */ +void +ReplicationSlotReleaseAndLog(void) +{ + char *slotname; + bool is_physical; + + Assert(am_walsender); + + slotname = pstrdup(NameStr(MyReplicationSlot->data.name)); + is_physical = SlotIsPhysical(MyReplicationSlot); + + ReplicationSlotRelease(); + + ereport(log_replication_commands ? LOG : DEBUG1, + (errmsg("walsender process with PID %d released %s replication slot \"%s\"", + MyProcPid, + is_physical ? "physical" : "logical", slotname))); + + pfree(slotname); +} + /* * Cleanup all temporary slots created in current session. */ diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index e250b0567e..2a1c5056c0 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -322,7 +322,7 @@ WalSndErrorCleanup(void) wal_segment_close(xlogreader); if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); + ReplicationSlotReleaseAndLog(); ReplicationSlotCleanup(); @@ -697,7 +697,7 @@ StartReplication(StartReplicationCmd *cmd) if (cmd->slotname) { - ReplicationSlotAcquire(cmd->slotname, true); + ReplicationSlotAcquireAndLog(cmd->slotname, true); if (SlotIsLogical(MyReplicationSlot)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), @@ -843,7 +843,7 @@ StartReplication(StartReplicationCmd *cmd) } if (cmd->slotname) - ReplicationSlotRelease(); + ReplicationSlotReleaseAndLog(); /* * Copy is finished now. Send a single-row result set indicating the next @@ -1261,7 +1261,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) Assert(!MyReplicationSlot); - ReplicationSlotAcquire(cmd->slotname, true); + ReplicationSlotAcquireAndLog(cmd->slotname, true); /* * Force a disconnect, so that the decoding code doesn't need to care @@ -1323,7 +1323,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) WalSndLoop(XLogSendLogical); FreeDecodingContext(logical_decoding_ctx); - ReplicationSlotRelease(); + ReplicationSlotReleaseAndLog(); replication_active = false; if (got_STOPPING) diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index d3535eed58..3dc8699106 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -223,7 +223,9 @@ extern void ReplicationSlotPersist(void); extern void ReplicationSlotDrop(const char *name, bool nowait); extern void ReplicationSlotAcquire(const char *name, bool nowait); +extern void ReplicationSlotAcquireAndLog(const char *name, bool nowait); extern void ReplicationSlotRelease(void); +extern void ReplicationSlotReleaseAndLog(void); extern void ReplicationSlotCleanup(void); extern void ReplicationSlotSave(void); extern void ReplicationSlotMarkDirty(void); -- 2.34.1