On Thu, Nov 2, 2023 at 7:19 AM Peter Smith <smithpb2...@gmail.com> wrote:
>
> But that's not quite compatible with what Alvaro [2] had written long
> back ("... the only acquisitions that would log messages are those in
> StartReplication and StartLogicalReplication.").
>
> In other words, ReplicationSlotAcquire/ReplicationSlotRelease is
> called by more places than you care to log from.

I refreshed my thoughts for this patch and I think it's enough if
walsenders alone log messages when slots become active and inactive.
How about something like the attached v11 patch?

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From eed7c96d9cab98a576728792c2cb111ae61f930f Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Sat, 4 Nov 2023 21:59:27 +0000
Subject: [PATCH v11] Emit messages when replication slots become active and
 inactive

These logs will be extremely useful on production servers to debug
and analyze inactive replication slot issues.
---
 doc/src/sgml/config.sgml            | 10 +++---
 src/backend/replication/slot.c      | 54 ++++++++++++++++++++++++++++-
 src/backend/replication/walsender.c | 10 +++---
 src/include/replication/slot.h      |  2 ++
 4 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index bd70ff2e4b..6d14a94cf4 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7510,11 +7510,11 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
       </term>
       <listitem>
        <para>
-        Causes each replication command to be logged in the server log.
-        See <xref linkend="protocol-replication"/> for more information about
-        replication command. The default value is <literal>off</literal>.
-        Only superusers and users with the appropriate <literal>SET</literal>
-        privilege can change this setting.
+        Causes each replication command and related activity to be logged in
+        the server log. See <xref linkend="protocol-replication"/> for more
+        information about replication command. The default value is
+        <literal>off</literal>. Only superusers and users with the appropriate
+        <literal>SET</literal> privilege can change this setting.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 99823df3c7..dfb564b04f 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -181,7 +181,12 @@ ReplicationSlotShmemExit(int code, Datum arg)
 {
 	/* Make sure active replication slots are released */
 	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
+	{
+		if (am_walsender)
+			ReplicationSlotReleaseAndLog();
+		else
+			ReplicationSlotRelease();
+	}
 
 	/* Also cleanup all the temporary slots. */
 	ReplicationSlotCleanup();
@@ -539,6 +544,26 @@ retry:
 		pgstat_acquire_replslot(s);
 }
 
+/*
+ * A wrapper around ReplicationSlotAcquire() but logs a message when
+ * appropriate.
+ *
+ * This function is currently used only in walsender.
+ */
+void
+ReplicationSlotAcquireAndLog(const char *name, bool nowait)
+{
+	Assert(am_walsender);
+
+	ReplicationSlotAcquire(name, nowait);
+
+	ereport(log_replication_commands ? LOG : DEBUG1,
+			(errmsg("walsender process with PID %d acquired %s replication slot \"%s\"",
+					MyProcPid,
+					SlotIsPhysical(MyReplicationSlot) ? "physical" : "logical",
+					NameStr(MyReplicationSlot->data.name))));
+}
+
 /*
  * Release the replication slot that this backend considers to own.
  *
@@ -598,6 +623,33 @@ ReplicationSlotRelease(void)
 	LWLockRelease(ProcArrayLock);
 }
 
+/*
+ * A wrapper around ReplicationSlotAcquire() but logs a message when
+ * appropriate.
+ *
+ * This function is currently used only in walsender.
+ */
+void
+ReplicationSlotReleaseAndLog(void)
+{
+	char	   *slotname;
+	bool		is_physical;
+
+	Assert(am_walsender);
+
+	slotname = pstrdup(NameStr(MyReplicationSlot->data.name));
+	is_physical = SlotIsPhysical(MyReplicationSlot);
+
+	ReplicationSlotRelease();
+
+	ereport(log_replication_commands ? LOG : DEBUG1,
+			(errmsg("walsender process with PID %d released %s replication slot \"%s\"",
+					MyProcPid,
+					is_physical ? "physical" : "logical", slotname)));
+
+	pfree(slotname);
+}
+
 /*
  * Cleanup all temporary slots created in current session.
  */
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e250b0567e..2a1c5056c0 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -322,7 +322,7 @@ WalSndErrorCleanup(void)
 		wal_segment_close(xlogreader);
 
 	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
+		ReplicationSlotReleaseAndLog();
 
 	ReplicationSlotCleanup();
 
@@ -697,7 +697,7 @@ StartReplication(StartReplicationCmd *cmd)
 
 	if (cmd->slotname)
 	{
-		ReplicationSlotAcquire(cmd->slotname, true);
+		ReplicationSlotAcquireAndLog(cmd->slotname, true);
 		if (SlotIsLogical(MyReplicationSlot))
 			ereport(ERROR,
 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@@ -843,7 +843,7 @@ StartReplication(StartReplicationCmd *cmd)
 	}
 
 	if (cmd->slotname)
-		ReplicationSlotRelease();
+		ReplicationSlotReleaseAndLog();
 
 	/*
 	 * Copy is finished now. Send a single-row result set indicating the next
@@ -1261,7 +1261,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)
 
 	Assert(!MyReplicationSlot);
 
-	ReplicationSlotAcquire(cmd->slotname, true);
+	ReplicationSlotAcquireAndLog(cmd->slotname, true);
 
 	/*
 	 * Force a disconnect, so that the decoding code doesn't need to care
@@ -1323,7 +1323,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)
 	WalSndLoop(XLogSendLogical);
 
 	FreeDecodingContext(logical_decoding_ctx);
-	ReplicationSlotRelease();
+	ReplicationSlotReleaseAndLog();
 
 	replication_active = false;
 	if (got_STOPPING)
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index d3535eed58..3dc8699106 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -223,7 +223,9 @@ extern void ReplicationSlotPersist(void);
 extern void ReplicationSlotDrop(const char *name, bool nowait);
 
 extern void ReplicationSlotAcquire(const char *name, bool nowait);
+extern void ReplicationSlotAcquireAndLog(const char *name, bool nowait);
 extern void ReplicationSlotRelease(void);
+extern void ReplicationSlotReleaseAndLog(void);
 extern void ReplicationSlotCleanup(void);
 extern void ReplicationSlotSave(void);
 extern void ReplicationSlotMarkDirty(void);
-- 
2.34.1

Reply via email to