Hi,

On 2022-10-31 16:21:06 +0530, Bharath Rupireddy wrote:
> BTW, I've seen a sporadic crash (SEGV) with the patch in bg writer
> with the same set up [1], I'm not sure if it's really because of the
> patch. I'm unable to reproduce it now and unfortunately I didn't
> capture further details when it occurred.

That's likely because the prototype patch I submitted in this thread missed
updating LWLockUpdateVar().

Updated patch attached.

Greetings,

Andres Freund
>From fbdd1345936d0e9909061fb6101831ade657b1b0 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Thu, 27 Oct 2022 10:18:13 -0700
Subject: [PATCH v3] lwlock: fix quadratic behaviour with very long wait lists

Author: Andres Freund <and...@anarazel.de>
Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostg...@gmail.com>
Discussion: https://postgr.es/m/20221027165914.2hofzp4cvutj6...@awork3.anarazel.de
Discussion: https://postgr.es/m/CALj2ACXktNbG=k8xi7psqboftzozavhaxjatvc14iyalu4m...@mail.gmail.com
Backpatch:
---
 src/include/storage/lwlock.h          |  8 +++++
 src/include/storage/proc.h            |  2 +-
 src/backend/access/transam/twophase.c |  2 +-
 src/backend/storage/lmgr/lwlock.c     | 50 +++++++++++++++------------
 src/backend/storage/lmgr/proc.c       |  4 +--
 5 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index ca4eca76f41..a494cb598ff 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -23,6 +23,14 @@
 
 struct PGPROC;
 
+/* what state of the wait process is a backend in */
+typedef enum LWLockWaitState
+{
+	LW_WS_NOT_WAITING, /* not currently waiting / woken up */
+	LW_WS_WAITING, /* currently waiting */
+	LW_WS_PENDING_WAKEUP, /* removed from waitlist, but not yet signalled */
+} LWLockWaitState;
+
 /*
  * Code outside of lwlock.c should not manipulate the contents of this
  * structure directly, but we have to declare it here to allow LWLocks to be
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 8d096fdeeb1..aa13e1d66e8 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -217,7 +217,7 @@ struct PGPROC
 	bool		recoveryConflictPending;
 
 	/* Info about LWLock the process is currently waiting for, if any. */
-	bool		lwWaiting;		/* true if waiting for an LW lock */
+	uint8		lwWaiting;		/* see LWLockWaitState */
 	uint8		lwWaitMode;		/* lwlock mode being waited for */
 	proclist_node lwWaitLink;	/* position in LW lock wait list */
 
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 803d169f578..5017f4451eb 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -485,7 +485,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
 	proc->roleId = owner;
 	proc->tempNamespaceId = InvalidOid;
 	proc->isBackgroundWorker = false;
-	proc->lwWaiting = false;
+	proc->lwWaiting = LW_WS_NOT_WAITING;
 	proc->lwWaitMode = 0;
 	proc->waitLock = NULL;
 	proc->waitProcLock = NULL;
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 0fc0cf6ebbd..a8a439ad516 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -987,6 +987,14 @@ LWLockWakeup(LWLock *lock)
 			wokeup_somebody = true;
 		}
 
+		/*
+		 * Signal that the process isn't on the wait list anymore. This allows
+		 * LWLockDequeueSelf() to remove itself of the waitlist with a
+		 * proclist_delete(), rather than having to check if it has been
+		 * removed from the list.
+		 */
+		waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
+
 		/*
 		 * Once we've woken up an exclusive lock, there's no point in waking
 		 * up anybody else.
@@ -1044,7 +1052,7 @@ LWLockWakeup(LWLock *lock)
 		 * another lock.
 		 */
 		pg_write_barrier();
-		waiter->lwWaiting = false;
+		waiter->lwWaiting = LW_WS_NOT_WAITING;
 		PGSemaphoreUnlock(waiter->sem);
 	}
 }
@@ -1065,7 +1073,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 	if (MyProc == NULL)
 		elog(PANIC, "cannot wait without a PGPROC structure");
 
-	if (MyProc->lwWaiting)
+	if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
 		elog(PANIC, "queueing for lock while waiting on another one");
 
 	LWLockWaitListLock(lock);
@@ -1073,7 +1081,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 	/* setting the flag is protected by the spinlock */
 	pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
 
-	MyProc->lwWaiting = true;
+	MyProc->lwWaiting = LW_WS_WAITING;
 	MyProc->lwWaitMode = mode;
 
 	/* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
@@ -1100,8 +1108,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 static void
 LWLockDequeueSelf(LWLock *lock)
 {
-	bool		found = false;
-	proclist_mutable_iter iter;
+	bool		on_waitlist;
 
 #ifdef LWLOCK_STATS
 	lwlock_stats *lwstats;
@@ -1114,18 +1121,13 @@ LWLockDequeueSelf(LWLock *lock)
 	LWLockWaitListLock(lock);
 
 	/*
-	 * Can't just remove ourselves from the list, but we need to iterate over
-	 * all entries as somebody else could have dequeued us.
+	 * Remove ourselves from the waitlist, unless we've already been
+	 * removed. The removal happens with the wait list lock held, so there's
+	 * no race in this check.
 	 */
-	proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
-	{
-		if (iter.cur == MyProc->pgprocno)
-		{
-			found = true;
-			proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
-			break;
-		}
-	}
+	on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
+	if (on_waitlist)
+		proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink);
 
 	if (proclist_is_empty(&lock->waiters) &&
 		(pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
@@ -1137,8 +1139,8 @@ LWLockDequeueSelf(LWLock *lock)
 	LWLockWaitListUnlock(lock);
 
 	/* clear waiting state again, nice for debugging */
-	if (found)
-		MyProc->lwWaiting = false;
+	if (on_waitlist)
+		MyProc->lwWaiting = LW_WS_NOT_WAITING;
 	else
 	{
 		int			extraWaits = 0;
@@ -1162,7 +1164,7 @@ LWLockDequeueSelf(LWLock *lock)
 		for (;;)
 		{
 			PGSemaphoreLock(MyProc->sem);
-			if (!MyProc->lwWaiting)
+			if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
 				break;
 			extraWaits++;
 		}
@@ -1313,7 +1315,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
 		for (;;)
 		{
 			PGSemaphoreLock(proc->sem);
-			if (!proc->lwWaiting)
+			if (proc->lwWaiting == LW_WS_NOT_WAITING)
 				break;
 			extraWaits++;
 		}
@@ -1478,7 +1480,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
 			for (;;)
 			{
 				PGSemaphoreLock(proc->sem);
-				if (!proc->lwWaiting)
+				if (proc->lwWaiting == LW_WS_NOT_WAITING)
 					break;
 				extraWaits++;
 			}
@@ -1694,7 +1696,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
 		for (;;)
 		{
 			PGSemaphoreLock(proc->sem);
-			if (!proc->lwWaiting)
+			if (proc->lwWaiting == LW_WS_NOT_WAITING)
 				break;
 			extraWaits++;
 		}
@@ -1772,6 +1774,8 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
 
 		proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
 		proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
+
+		waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
 	}
 
 	/* We are done updating shared state of the lock itself. */
@@ -1787,7 +1791,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
 		proclist_delete(&wakeup, iter.cur, lwWaitLink);
 		/* check comment in LWLockWakeup() about this barrier */
 		pg_write_barrier();
-		waiter->lwWaiting = false;
+		waiter->lwWaiting = LW_WS_NOT_WAITING;
 		PGSemaphoreUnlock(waiter->sem);
 	}
 }
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 13fa07b0ff7..01af5ab4a2e 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -397,7 +397,7 @@ InitProcess(void)
 	/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
 	if (IsAutoVacuumWorkerProcess())
 		MyProc->statusFlags |= PROC_IS_AUTOVACUUM;
-	MyProc->lwWaiting = false;
+	MyProc->lwWaiting = LW_WS_NOT_WAITING;
 	MyProc->lwWaitMode = 0;
 	MyProc->waitLock = NULL;
 	MyProc->waitProcLock = NULL;
@@ -579,7 +579,7 @@ InitAuxiliaryProcess(void)
 	MyProc->isBackgroundWorker = IsBackgroundWorker;
 	MyProc->delayChkptFlags = 0;
 	MyProc->statusFlags = 0;
-	MyProc->lwWaiting = false;
+	MyProc->lwWaiting = LW_WS_NOT_WAITING;
 	MyProc->lwWaitMode = 0;
 	MyProc->waitLock = NULL;
 	MyProc->waitProcLock = NULL;
-- 
2.38.0

Reply via email to