Hi, On 2022-10-31 16:21:06 +0530, Bharath Rupireddy wrote: > BTW, I've seen a sporadic crash (SEGV) with the patch in bg writer > with the same set up [1], I'm not sure if it's really because of the > patch. I'm unable to reproduce it now and unfortunately I didn't > capture further details when it occurred.
That's likely because the prototype patch I submitted in this thread missed updating LWLockUpdateVar(). Updated patch attached. Greetings, Andres Freund
>From fbdd1345936d0e9909061fb6101831ade657b1b0 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Thu, 27 Oct 2022 10:18:13 -0700 Subject: [PATCH v3] lwlock: fix quadratic behaviour with very long wait lists Author: Andres Freund <and...@anarazel.de> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostg...@gmail.com> Discussion: https://postgr.es/m/20221027165914.2hofzp4cvutj6...@awork3.anarazel.de Discussion: https://postgr.es/m/CALj2ACXktNbG=k8xi7psqboftzozavhaxjatvc14iyalu4m...@mail.gmail.com Backpatch: --- src/include/storage/lwlock.h | 8 +++++ src/include/storage/proc.h | 2 +- src/backend/access/transam/twophase.c | 2 +- src/backend/storage/lmgr/lwlock.c | 50 +++++++++++++++------------ src/backend/storage/lmgr/proc.c | 4 +-- 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index ca4eca76f41..a494cb598ff 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -23,6 +23,14 @@ struct PGPROC; +/* what state of the wait process is a backend in */ +typedef enum LWLockWaitState +{ + LW_WS_NOT_WAITING, /* not currently waiting / woken up */ + LW_WS_WAITING, /* currently waiting */ + LW_WS_PENDING_WAKEUP, /* removed from waitlist, but not yet signalled */ +} LWLockWaitState; + /* * Code outside of lwlock.c should not manipulate the contents of this * structure directly, but we have to declare it here to allow LWLocks to be diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 8d096fdeeb1..aa13e1d66e8 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -217,7 +217,7 @@ struct PGPROC bool recoveryConflictPending; /* Info about LWLock the process is currently waiting for, if any. */ - bool lwWaiting; /* true if waiting for an LW lock */ + uint8 lwWaiting; /* see LWLockWaitState */ uint8 lwWaitMode; /* lwlock mode being waited for */ proclist_node lwWaitLink; /* position in LW lock wait list */ diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 803d169f578..5017f4451eb 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -485,7 +485,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->roleId = owner; proc->tempNamespaceId = InvalidOid; proc->isBackgroundWorker = false; - proc->lwWaiting = false; + proc->lwWaiting = LW_WS_NOT_WAITING; proc->lwWaitMode = 0; proc->waitLock = NULL; proc->waitProcLock = NULL; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 0fc0cf6ebbd..a8a439ad516 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -987,6 +987,14 @@ LWLockWakeup(LWLock *lock) wokeup_somebody = true; } + /* + * Signal that the process isn't on the wait list anymore. This allows + * LWLockDequeueSelf() to remove itself of the waitlist with a + * proclist_delete(), rather than having to check if it has been + * removed from the list. + */ + waiter->lwWaiting = LW_WS_PENDING_WAKEUP; + /* * Once we've woken up an exclusive lock, there's no point in waking * up anybody else. @@ -1044,7 +1052,7 @@ LWLockWakeup(LWLock *lock) * another lock. */ pg_write_barrier(); - waiter->lwWaiting = false; + waiter->lwWaiting = LW_WS_NOT_WAITING; PGSemaphoreUnlock(waiter->sem); } } @@ -1065,7 +1073,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) if (MyProc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); - if (MyProc->lwWaiting) + if (MyProc->lwWaiting != LW_WS_NOT_WAITING) elog(PANIC, "queueing for lock while waiting on another one"); LWLockWaitListLock(lock); @@ -1073,7 +1081,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) /* setting the flag is protected by the spinlock */ pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS); - MyProc->lwWaiting = true; + MyProc->lwWaiting = LW_WS_WAITING; MyProc->lwWaitMode = mode; /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */ @@ -1100,8 +1108,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) static void LWLockDequeueSelf(LWLock *lock) { - bool found = false; - proclist_mutable_iter iter; + bool on_waitlist; #ifdef LWLOCK_STATS lwlock_stats *lwstats; @@ -1114,18 +1121,13 @@ LWLockDequeueSelf(LWLock *lock) LWLockWaitListLock(lock); /* - * Can't just remove ourselves from the list, but we need to iterate over - * all entries as somebody else could have dequeued us. + * Remove ourselves from the waitlist, unless we've already been + * removed. The removal happens with the wait list lock held, so there's + * no race in this check. */ - proclist_foreach_modify(iter, &lock->waiters, lwWaitLink) - { - if (iter.cur == MyProc->pgprocno) - { - found = true; - proclist_delete(&lock->waiters, iter.cur, lwWaitLink); - break; - } - } + on_waitlist = MyProc->lwWaiting == LW_WS_WAITING; + if (on_waitlist) + proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink); if (proclist_is_empty(&lock->waiters) && (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0) @@ -1137,8 +1139,8 @@ LWLockDequeueSelf(LWLock *lock) LWLockWaitListUnlock(lock); /* clear waiting state again, nice for debugging */ - if (found) - MyProc->lwWaiting = false; + if (on_waitlist) + MyProc->lwWaiting = LW_WS_NOT_WAITING; else { int extraWaits = 0; @@ -1162,7 +1164,7 @@ LWLockDequeueSelf(LWLock *lock) for (;;) { PGSemaphoreLock(MyProc->sem); - if (!MyProc->lwWaiting) + if (MyProc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1313,7 +1315,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) for (;;) { PGSemaphoreLock(proc->sem); - if (!proc->lwWaiting) + if (proc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1478,7 +1480,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) for (;;) { PGSemaphoreLock(proc->sem); - if (!proc->lwWaiting) + if (proc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1694,7 +1696,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) for (;;) { PGSemaphoreLock(proc->sem); - if (!proc->lwWaiting) + if (proc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1772,6 +1774,8 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) proclist_delete(&lock->waiters, iter.cur, lwWaitLink); proclist_push_tail(&wakeup, iter.cur, lwWaitLink); + + waiter->lwWaiting = LW_WS_PENDING_WAKEUP; } /* We are done updating shared state of the lock itself. */ @@ -1787,7 +1791,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) proclist_delete(&wakeup, iter.cur, lwWaitLink); /* check comment in LWLockWakeup() about this barrier */ pg_write_barrier(); - waiter->lwWaiting = false; + waiter->lwWaiting = LW_WS_NOT_WAITING; PGSemaphoreUnlock(waiter->sem); } } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 13fa07b0ff7..01af5ab4a2e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -397,7 +397,7 @@ InitProcess(void) /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ if (IsAutoVacuumWorkerProcess()) MyProc->statusFlags |= PROC_IS_AUTOVACUUM; - MyProc->lwWaiting = false; + MyProc->lwWaiting = LW_WS_NOT_WAITING; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; @@ -579,7 +579,7 @@ InitAuxiliaryProcess(void) MyProc->isBackgroundWorker = IsBackgroundWorker; MyProc->delayChkptFlags = 0; MyProc->statusFlags = 0; - MyProc->lwWaiting = false; + MyProc->lwWaiting = LW_WS_NOT_WAITING; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; -- 2.38.0