Hi, The lwlock wait queue scalability issue [1] was quite hard to find because of the exponential backoff and because we adjust spins_per_delay over time within a backend.
I think the least we could do to make this easier would be to signal spin delays as wait events. We surely don't want to do so for non-contended spins because of the overhead, but once we get to the point of calling pg_usleep() that's not an issue. I don't think it's worth trying to hand down more detailed information about the specific spinlock we're waiting on, at least for now. We'd have to invent a whole lot of new wait events because most spinlocks don't have ones yet. I couldn't quite decide what wait_event_type to best group this under? In the attached patch I put it under timeouts, which doesn't seem awful. I reverted a4adc31f690 and indeed it shows SpinDelay wait events before the fix and not after. Greetings, Andres Freund [1] https://postgr.es/m/20221027165914.2hofzp4cvutj6gin%40awork3.anarazel.de
>From 6d7c62e5ebbe9bf906e5f4e1682ece6fff69cd37 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Sun, 20 Nov 2022 12:39:14 -0800 Subject: [PATCH] Add WAIT_EVENT_SPIN_DELAY --- src/include/utils/wait_event.h | 3 ++- src/backend/storage/lmgr/s_lock.c | 8 ++++++++ src/backend/utils/activity/wait_event.c | 3 +++ doc/src/sgml/monitoring.sgml | 4 ++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index 6f2d5612e06..3d87d550119 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -146,7 +146,8 @@ typedef enum WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL, WAIT_EVENT_REGISTER_SYNC_REQUEST, WAIT_EVENT_VACUUM_DELAY, - WAIT_EVENT_VACUUM_TRUNCATE + WAIT_EVENT_VACUUM_TRUNCATE, + WAIT_EVENT_SPIN_DELAY } WaitEventTimeout; /* ---------- diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c index 4e473ec27ec..b04930db760 100644 --- a/src/backend/storage/lmgr/s_lock.c +++ b/src/backend/storage/lmgr/s_lock.c @@ -53,6 +53,7 @@ #include "common/pg_prng.h" #include "port/atomics.h" #include "storage/s_lock.h" +#include "utils/wait_event.h" #define MIN_SPINS_PER_DELAY 10 #define MAX_SPINS_PER_DELAY 1000 @@ -136,7 +137,14 @@ perform_spin_delay(SpinDelayStatus *status) if (status->cur_delay == 0) /* first time to delay? */ status->cur_delay = MIN_DELAY_USEC; + /* + * Once we start sleeping, the overhead of reporting a wait event is + * justified. Actively spinning easily stands out in profilers, but + * sleeping with an exponential backoff is harder to spot... + */ + pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY); pg_usleep(status->cur_delay); + pgstat_report_wait_end(); #if defined(S_LOCK_TEST) fprintf(stdout, "*"); diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index 92f24a6c9bc..02e953420e5 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -503,6 +503,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w) case WAIT_EVENT_VACUUM_TRUNCATE: event_name = "VacuumTruncate"; break; + case WAIT_EVENT_SPIN_DELAY: + event_name = "SpinDelay"; + break; /* no default case, so that compiler will warn */ } diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index e5d622d5147..0599eac71a6 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -2298,6 +2298,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser <entry>Waiting to acquire an exclusive lock to truncate off any empty pages at the end of a table vacuumed.</entry> </row> + <row> + <entry><literal>SpinDelay</literal></entry> + <entry>Waiting to acquire a contended spinlock.</entry> + </row> </tbody> </tgroup> </table> -- 2.38.0