From 58403718a39b81aa107e29bd1791e158f19e5646 Mon Sep 17 00:00:00 2001
From: Fabrice Chapuis <fabrice.chapuis@zas.admin.ch>
Date: Mon, 28 Jul 2025 15:27:03 +0200
Subject: [PATCH v1] fix failover slot issue when doing a switchover

---
 src/backend/catalog/system_functions.sql   |  1 +
 src/backend/replication/logical/slotsync.c | 44 +++++++++++++++++-----
 src/backend/replication/slot.c             |  7 +++-
 src/backend/replication/slotfuncs.c        | 10 +++--
 src/backend/replication/walsender.c        |  4 +-
 src/include/catalog/pg_proc.dat            |  8 ++--
 src/include/replication/slot.h             |  7 +++-
 7 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index 566f308..6cd3175 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -480,6 +480,7 @@ CREATE OR REPLACE FUNCTION pg_create_logical_replication_slot(
     IN temporary boolean DEFAULT false,
     IN twophase boolean DEFAULT false,
     IN failover boolean DEFAULT false,
+    IN allow_overwrite boolean DEFAULT false,
     OUT slot_name name, OUT lsn pg_lsn)
 RETURNS RECORD
 LANGUAGE INTERNAL
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 656e66e..d6332cd 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -627,6 +627,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 	ReplicationSlot *slot;
 	XLogRecPtr	latestFlushPtr;
 	bool		slot_updated = false;
+	bool 		allow_overwrite = false;		
 
 	/*
 	 * Make sure that concerned WAL is received and flushed before syncing
@@ -649,24 +650,46 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 
 		return false;
 	}
-
-	/* Search for the named slot */
+	// Both local and remote slot have the same name
 	if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
 	{
-		bool		synced;
+		bool		synced;	
 
 		SpinLockAcquire(&slot->mutex);
 		synced = slot->data.synced;
+		allow_overwrite = slot->data.allow_overwrite;
 		SpinLockRelease(&slot->mutex);
-
-		/* User-created slot with the same name exists, raise ERROR. */
-		if (!synced)
-			ereport(ERROR,
+	
+		if (!synced){						
+			/* 
+			 * Check if we need to overwrite an existing
+			 * logical slot 
+			*/
+			if (allow_overwrite){								
+				/* 
+				 * Get rid of a replication slot that is no
+				 *longer wanted 
+				*/
+				ReplicationSlotDrop(remote_slot->name,true);
+
+				/* Get rid of a replication slot that is no longer wanted */
+				ereport(WARNING,
+					errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					errmsg("slot \"%s\" already exists"
+                            " on the standby but it will be dropped because flag allow_overwrite is set to true",
+                             remote_slot->name));
+				
+				/* Going back to the main loop after droping the failover slot */
+				return false; 			
+			}
+			else
+				/* User-created slot with the same name exists, raise ERROR. */  
+				ereport(ERROR,
 					errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 					errmsg("exiting from slot synchronization because same"
-						   " name slot \"%s\" already exists on the standby",
-						   remote_slot->name));
-
+							" name slot \"%s\" already exists on the standby",
+							remote_slot->name));			
+		}		
 		/*
 		 * The slot has been synchronized before.
 		 *
@@ -761,6 +784,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 		ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,
 							  remote_slot->two_phase,
 							  remote_slot->failover,
+							  allow_overwrite,
 							  true);
 
 		/* For shorter lines. */
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 600b87f..d6bc5c6 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -323,7 +323,7 @@ ReplicationSlotValidateName(const char *name, int elevel)
 void
 ReplicationSlotCreate(const char *name, bool db_specific,
 					  ReplicationSlotPersistency persistency,
-					  bool two_phase, bool failover, bool synced)
+					  bool two_phase, bool failover, bool allow_overwrite, bool synced)
 {
 	ReplicationSlot *slot = NULL;
 	int			i;
@@ -413,6 +413,11 @@ ReplicationSlotCreate(const char *name, bool db_specific,
 	slot->data.two_phase_at = InvalidXLogRecPtr;
 	slot->data.failover = failover;
 	slot->data.synced = synced;
+	slot->data.allow_overwrite = allow_overwrite;
+
+	elog(LOG, "Logical replication slot %s created with option allow_overwrite to %s",
+     NameStr(slot->data.name),
+     slot->data.allow_overwrite ? "true" : "false");
 
 	/* and then data only present in shared memory */
 	slot->just_dirtied = false;
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 36cc2ed..5dd5f50 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -40,7 +40,7 @@ create_physical_replication_slot(char *name, bool immediately_reserve,
 
 	/* acquire replication slot, this will check for conflicting names */
 	ReplicationSlotCreate(name, false,
-						  temporary ? RS_TEMPORARY : RS_PERSISTENT, false,
+						  temporary ? RS_TEMPORARY : RS_PERSISTENT, false, false,
 						  false, false);
 
 	if (immediately_reserve)
@@ -116,7 +116,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
 static void
 create_logical_replication_slot(char *name, char *plugin,
 								bool temporary, bool two_phase,
-								bool failover,
+								bool failover, bool allow_overwrite,
 								XLogRecPtr restart_lsn,
 								bool find_startpoint)
 {
@@ -134,7 +134,7 @@ create_logical_replication_slot(char *name, char *plugin,
 	 */
 	ReplicationSlotCreate(name, true,
 						  temporary ? RS_TEMPORARY : RS_EPHEMERAL, two_phase,
-						  failover, false);
+						  failover, allow_overwrite, false);
 
 	/*
 	 * Create logical decoding context to find start point or, if we don't
@@ -173,6 +173,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
 	bool		temporary = PG_GETARG_BOOL(2);
 	bool		two_phase = PG_GETARG_BOOL(3);
 	bool		failover = PG_GETARG_BOOL(4);
+	bool		allow_overwrite = PG_GETARG_BOOL(5); 
 	Datum		result;
 	TupleDesc	tupdesc;
 	HeapTuple	tuple;
@@ -191,6 +192,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
 									temporary,
 									two_phase,
 									failover,
+									allow_overwrite,
 									InvalidXLogRecPtr,
 									true);
 
@@ -210,7 +212,6 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(result);
 }
 
-
 /*
  * SQL function for dropping a replication slot.
  */
@@ -726,6 +727,7 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
 										temporary,
 										false,
 										false,
+										false,
 										src_restart_lsn,
 										false);
 	}
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 9fa8beb..ef22695 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1198,7 +1198,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
 	{
 		ReplicationSlotCreate(cmd->slotname, false,
 							  cmd->temporary ? RS_TEMPORARY : RS_PERSISTENT,
-							  false, false, false);
+							  false, false, false, false);
 
 		if (reserve_wal)
 		{
@@ -1229,7 +1229,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
 		 */
 		ReplicationSlotCreate(cmd->slotname, true,
 							  cmd->temporary ? RS_TEMPORARY : RS_EPHEMERAL,
-							  two_phase, failover, false);
+							  two_phase, failover, false, false);
 
 		/*
 		 * Do options check early so that we can bail before calling the
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 62beb71..074805d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11480,10 +11480,10 @@
 { oid => '3786', descr => 'set up a logical replication slot',
   proname => 'pg_create_logical_replication_slot', provolatile => 'v',
   proparallel => 'u', prorettype => 'record',
-  proargtypes => 'name name bool bool bool',
-  proallargtypes => '{name,name,bool,bool,bool,name,pg_lsn}',
-  proargmodes => '{i,i,i,i,i,o,o}',
-  proargnames => '{slot_name,plugin,temporary,twophase,failover,slot_name,lsn}',
+  proargtypes => 'name name bool bool bool bool',
+  proallargtypes => '{name,name,bool,bool,bool,bool,name,pg_lsn}',
+  proargmodes => '{i,i,i,i,i,i,o,o}',
+  proargnames => '{slot_name,plugin,temporary,twophase,failover,allow_overwrite,slot_name,lsn}',
   prosrc => 'pg_create_logical_replication_slot' },
 { oid => '4222',
   descr => 'copy a logical replication slot, changing temporality and plugin',
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index eb0b93b..1fd6445 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -134,6 +134,11 @@ typedef struct ReplicationSlotPersistentData
 	 * for logical slots on the primary server.
 	 */
 	bool		failover;
+	/*
+	 * Allow Postgres to drop logical replication slot on standby server to ensure 
+	 * creation of new failover slot when sync_replication_slots is true.
+	 */
+	bool		allow_overwrite;	
 } ReplicationSlotPersistentData;
 
 /*
@@ -267,7 +272,7 @@ extern void ReplicationSlotsShmemInit(void);
 /* management of individual slots */
 extern void ReplicationSlotCreate(const char *name, bool db_specific,
 								  ReplicationSlotPersistency persistency,
-								  bool two_phase, bool failover,
+								  bool two_phase, bool failover, bool allow_overwrite,
 								  bool synced);
 extern void ReplicationSlotPersist(void);
 extern void ReplicationSlotDrop(const char *name, bool nowait);
-- 
2.47.3

