From bf90fe696e3384885f8a993ab03821ab62c80e72 Mon Sep 17 00:00:00 2001
From: Ajin Cherian <itsajin@gmail.com>
Date: Mon, 28 Jul 2025 08:00:23 -0400
Subject: [PATCH v5] Fix a deadlock during ALTER SUBSCRIPTION... DROP
 PUBLICATION

When user drops a publication from a subscription, this will result in a
publication refresh on the subscriber which will try and drop any pending origins.
Meanwhile the apply worker could also be trying to cleanup origins. There could be
a deadlock if the order of locking of SubscriptionRelationId, SubscriptionRelRelationId and
ReplicationOriginRelationId are not aligned between functions
process_syncing_tables_for_apply() and AlterSubscription_refresh().

The fix is to get locks on SubscriptionRelationId, SubscriptionRelRelationId and
ReplicationOriginRelationId in that order when dropping tracking origins.
---
 src/backend/catalog/pg_subscription.c       | 22 ++++++++++++++++++++--
 src/backend/replication/logical/tablesync.c | 22 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
index add51ca..657d6ff 100644
--- a/src/backend/catalog/pg_subscription.c
+++ b/src/backend/catalog/pg_subscription.c
@@ -331,10 +331,28 @@ UpdateSubscriptionRelState(Oid subid, Oid relid, char state,
 	bool		nulls[Natts_pg_subscription_rel];
 	Datum		values[Natts_pg_subscription_rel];
 	bool		replaces[Natts_pg_subscription_rel];
+	bool		sub_rel_locked, sub_locked;
+	LOCKTAG		tag;
 
-	LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock);
 
-	rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
+	/*
+	 * Check if we already hold locks for SubscriptionRelRelationId
+	 * and SubscriptionRelationId. If we do, we don't need to take
+	 * them again.
+	 */
+	sub_rel_locked = CheckRelationOidLockedByMe(SubscriptionRelRelationId,
+												RowExclusiveLock, true);
+
+	SET_LOCKTAG_OBJECT(tag, InvalidOid, SubscriptionRelationId, subid, 0);
+	sub_locked = LockHeldByMe(&tag, AccessShareLock);
+
+	if (sub_rel_locked && sub_locked)
+		rel = table_open(SubscriptionRelRelationId, NoLock);
+	else
+	{
+		LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock);
+		rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
+	}
 
 	/* Try finding existing mapping. */
 	tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP,
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index e6159ac..6d5024b 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -379,6 +379,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 	static HTAB *last_start_times = NULL;
 	ListCell   *lc;
 	bool		started_tx = false;
+	Relation	rel = NULL;
 
 	Assert(!IsTransactionState());
 
@@ -470,7 +471,17 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 				 * refresh for the subscription where we remove the table
 				 * state and its origin and by this time the origin might be
 				 * already removed. So passing missing_ok = true.
+				 *
+				 * Lock SubscriptionRelationId with AccessShareLock and
+				 * take RowExclusiveLock on SubscriptionRelRelationId to
+				 * keep the same locking order as refresh from an
+				 * user issued DDL command to prevent any deadlocks.
 				 */
+				LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid,
+								 0, AccessShareLock);
+				if (!rel)
+					rel = table_open(SubscriptionRelRelationId, RowExclusiveLock);
+
 				ReplicationOriginNameForTablesync(MyLogicalRepWorker->subid,
 												  rstate->relid,
 												  originname,
@@ -533,7 +544,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 						 * This is required to avoid any undetected deadlocks
 						 * due to any existing lock as deadlock detector won't
 						 * be able to detect the waits on the latch.
+						 *
+						 * Also close any tables prior to the commit.
 						 */
+						if (rel)
+						{
+							table_close(rel, NoLock);
+							rel = NULL;
+						}
 						CommitTransactionCommand();
 						pgstat_report_stat(false);
 					}
@@ -593,6 +611,10 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 		}
 	}
 
+	/* Close table if opened */
+	if (rel)
+		table_close(rel, NoLock);
+
 	if (started_tx)
 	{
 		CommitTransactionCommand();
-- 
1.8.3.1

