Robert Haas <robertmh...@gmail.com> writes: > On Wed, Jan 10, 2024 at 10:17 PM Andy Fan <zhihuifan1...@163.com> wrote: >> fixed in v2. > > Timing the spinlock wait seems like a separate patch from the new sanity > checks.
Yes, a separate patch would be better, so removed it from v4. > I suspect that the new sanity checks should only be armed in > assert-enabled builds. There are 2 changes in v4. a). Make sure every code is only armed in assert-enabled builds. Previously there was some counter++ in non assert-enabled build. b). Record the location of spin lock so that whenever the Assert failure, we know which spin lock it is. In our internal testing, that helps a lot. -- Best Regards Andy Fan
>From 1caa02079c8cf689b73503d2c79d9814b33082aa Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" <yizhi....@alibaba-inc.com> Date: Mon, 15 Jan 2024 13:18:34 +0800 Subject: [PATCH v4 1/1] Detect more misuse of spin lock automatically Spin lock are intended for *very* short-term locks, but it is possible to be misused in many cases. e.g. Acquiring another LWLocks or regular locks, memory allocation. In this patch, all of such cases may increase the timing of holding the spin lock. In our culture, if any spin lock can't be acquired for some-time, a crash should happen. See the s_lock_stuck in perform_spin_delay. CHECK_FOR_INTERRUPTS should be avoided as well when holding a spin lock. Depends on what signals are left to handle, PG may raise error/fatal which would cause the code jump to some other places which is hardly to release the spin lock anyway. Because of this, elog(...) is not allowed since it calls CHECK_FOR_INTERRUPTS. --- src/backend/access/table/tableam.c | 1 + src/backend/storage/buffer/bufmgr.c | 1 + src/backend/storage/ipc/barrier.c | 1 + src/backend/storage/ipc/shm_toc.c | 1 + src/backend/storage/lmgr/lock.c | 2 ++ src/backend/storage/lmgr/lwlock.c | 2 ++ src/backend/utils/hash/dynahash.c | 1 + src/backend/utils/init/globals.c | 1 + src/backend/utils/mmgr/mcxt.c | 8 +++++ src/include/miscadmin.h | 56 +++++++++++++++++++++++++++++ src/include/storage/buf_internals.h | 1 + src/include/storage/spin.h | 16 +++++++-- 12 files changed, 88 insertions(+), 3 deletions(-) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 6ed8cca05a..6c6a65764c 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -24,6 +24,7 @@ #include "access/syncscan.h" #include "access/tableam.h" #include "access/xact.h" +#include "miscadmin.h" #include "optimizer/plancat.h" #include "port/pg_bitutils.h" #include "storage/bufmgr.h" diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7d601bef6d..c600a113cf 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -5409,6 +5409,7 @@ LockBufHdr(BufferDesc *desc) init_local_spin_delay(&delayStatus); + START_SPIN_LOCK(); while (true) { /* set BM_LOCKED flag */ diff --git a/src/backend/storage/ipc/barrier.c b/src/backend/storage/ipc/barrier.c index 5b52e060ba..63167a04bb 100644 --- a/src/backend/storage/ipc/barrier.c +++ b/src/backend/storage/ipc/barrier.c @@ -85,6 +85,7 @@ #include "postgres.h" #include "storage/barrier.h" +#include "miscadmin.h" static inline bool BarrierDetachImpl(Barrier *barrier, bool arrive); diff --git a/src/backend/storage/ipc/shm_toc.c b/src/backend/storage/ipc/shm_toc.c index 8db9d25aac..9cdec41054 100644 --- a/src/backend/storage/ipc/shm_toc.c +++ b/src/backend/storage/ipc/shm_toc.c @@ -13,6 +13,7 @@ #include "postgres.h" +#include "miscadmin.h" #include "port/atomics.h" #include "storage/shm_toc.h" #include "storage/spin.h" diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index c70a1adb9a..3b069b233a 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -776,6 +776,8 @@ LockAcquireExtended(const LOCKTAG *locktag, bool found_conflict; bool log_lock = false; + ASSERT_NO_SPIN_LOCK(); + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index b4b989ac56..6ef753f670 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -1205,6 +1205,8 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE); + ASSERT_NO_SPIN_LOCK(); + PRINT_LWDEBUG("LWLockAcquire", lock, mode); #ifdef LWLOCK_STATS diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index a4152080b5..504dae4d6c 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -98,6 +98,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "miscadmin.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 88b03e8fa3..b70105ef02 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -40,6 +40,7 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 CritSectionCount = 0; +SpinLockState spinLockState = {0, 0, NULL}; int MyProcPid; pg_time_t MyStartTime; diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 1336944084..a315ef41de 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -1028,6 +1028,8 @@ MemoryContextAlloc(MemoryContext context, Size size) if (!AllocSizeIsValid(size)) elog(ERROR, "invalid memory alloc request size %zu", size); + ASSERT_NO_SPIN_LOCK(); + context->isReset = false; ret = context->methods->alloc(context, size); @@ -1071,6 +1073,8 @@ MemoryContextAllocZero(MemoryContext context, Size size) if (!AllocSizeIsValid(size)) elog(ERROR, "invalid memory alloc request size %zu", size); + ASSERT_NO_SPIN_LOCK(); + context->isReset = false; ret = context->methods->alloc(context, size); @@ -1197,6 +1201,8 @@ palloc(Size size) if (!AllocSizeIsValid(size)) elog(ERROR, "invalid memory alloc request size %zu", size); + ASSERT_NO_SPIN_LOCK(); + context->isReset = false; ret = context->methods->alloc(context, size); @@ -1228,6 +1234,8 @@ palloc0(Size size) if (!AllocSizeIsValid(size)) elog(ERROR, "invalid memory alloc request size %zu", size); + ASSERT_NO_SPIN_LOCK(); + context->isReset = false; ret = context->methods->alloc(context, size); diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 0b01c1f093..13bdb2ccb8 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -104,6 +104,61 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount; extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount; extern PGDLLIMPORT volatile uint32 CritSectionCount; +typedef struct +{ + const char *file; + int line; +} SpinLockLoc; + +typedef struct +{ + volatile int size; + int allocSize; + SpinLockLoc *spinLocs; +} SpinLockState; + +extern SpinLockState spinLockState; + +#ifdef USE_ASSERT_CHECKING +#define START_SPIN_LOCK() \ +do { \ + if (spinLockState.size == spinLockState.allocSize) \ + { \ + int sz = spinLockState.size == 0 ? 2: spinLockState.size * 2; \ + /* use malloc instead of MemoryContextAlloc to avoid complex .h file dependencies. */ \ + void *nlocs = malloc(sizeof(SpinLockLoc) * sz); \ + if (spinLockState.size != 0) \ + { \ + memcpy(nlocs, spinLockState.spinLocs, sizeof(SpinLockLoc) * spinLockState.size); \ + free(spinLockState.spinLocs); \ + } \ + spinLockState.spinLocs = nlocs; \ + } \ + spinLockState.spinLocs[spinLockState.size].file = __FILE__; \ + spinLockState.spinLocs[spinLockState.size].line = __LINE__; \ + spinLockState.size++; \ +} while (0) + +#define END_SPIN_LOCK() \ +do { \ + Assert(spinLockState.size > 0); \ + spinLockState.size--; \ +} while(0) + +#define ASSERT_NO_SPIN_LOCK() \ +do { \ + if (spinLockState.size != 0) \ + elog(PANIC, \ + "inappropriate action is taken when holding a spin lock at %s:%d", \ + spinLockState.spinLocs[spinLockState.size-1].file, \ + spinLockState.spinLocs[spinLockState.size-1].line); \ +} while(0) +#else +#define START_SPIN_LOCK() ((void) true) +#define END_SPIN_LOCK() ((void) true) +#define ASSERT_NO_SPIN_LOCK() ((void) true) +#endif + /* in tcop/postgres.c */ extern void ProcessInterrupts(void); @@ -120,6 +175,7 @@ extern void ProcessInterrupts(void); /* Service interrupt, if one is pending and it's safe to service it now */ #define CHECK_FOR_INTERRUPTS() \ do { \ + ASSERT_NO_SPIN_LOCK(); \ if (INTERRUPTS_PENDING_CONDITION()) \ ProcessInterrupts(); \ } while(0) diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index e43e616579..ad7e1d0a21 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -363,6 +363,7 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state) { pg_write_barrier(); pg_atomic_write_u32(&desc->state, buf_state & (~BM_LOCKED)); + END_SPIN_LOCK(); } /* in bufmgr.c */ diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h index c0679c5999..aeade8cc1c 100644 --- a/src/include/storage/spin.h +++ b/src/include/storage/spin.h @@ -56,12 +56,22 @@ #include "storage/pg_sema.h" #endif - #define SpinLockInit(lock) S_INIT_LOCK(lock) -#define SpinLockAcquire(lock) S_LOCK(lock) +#define SpinLockAcquire(lock) \ +do \ +{ \ + START_SPIN_LOCK(); \ + S_LOCK(lock); \ +} while (false) + -#define SpinLockRelease(lock) S_UNLOCK(lock) +#define SpinLockRelease(lock) \ +do \ +{ \ + S_UNLOCK(lock); \ + END_SPIN_LOCK(); \ +} while (false) #define SpinLockFree(lock) S_LOCK_FREE(lock) -- 2.34.1