At Mon, 8 Aug 2022 11:53:19 -0400, Greg Stark <st...@mit.edu> wrote in > I'm trying to wrap my head around the shared memory stats collector > infrastructure from > <20220406030008.2qxipjxo776dw...@alap3.anarazel.de> committed in > 5891c7a8ed8f2d3d577e7eea34dacff12d7b6bbd. > > I have one question about locking -- afaics there's nothing protecting > reading the shared memory stats. There is an lwlock protecting > concurrent updates of the shared memory stats, but that lock isn't > taken when we read the stats. Are we ok relying on atomic 64-bit reads > or is there something else going on that I'm missing? > > In particular I'm looking at pgstat.c:847 in pgstat_fetch_entry() > which does this: > > memcpy(stats_data, > pgstat_get_entry_data(kind, entry_ref->shared_stats), > kind_info->shared_data_len); > > stats_data is the returned copy of the stats entry with all the > statistics in it. But it's copied from the shared memory location > directly using memcpy and there's no locking or change counter or > anything protecting this memcpy that I can see.
We take LW_SHARED while creating a snapshot of fixed-numbered stats. On the other hand we don't for variable-numbered stats. I agree to you, that we need that also for variable-numbered stats. If I'm not missing something, it's strange that pgstat_lock_entry() only takes LW_EXCLUSIVE. The atached changes the interface of pgstat_lock_entry() but there's only one user since another read of shared stats entry is not using reference. Thus the interface change might be too much. If I just add bare LWLockAcquire/Release() to pgstat_fetch_entry,the amount of the patch will be reduced. regards. -- Kyotaro Horiguchi NTT Open Source Software Center
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 88e5dd1b2b..7c4e5f0238 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -844,9 +844,11 @@ pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid) else stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context, kind_info->shared_data_len); + pgstat_lock_entry(entry_ref, LW_SHARED, false); memcpy(stats_data, pgstat_get_entry_data(kind, entry_ref->shared_stats), kind_info->shared_data_len); + pgstat_unlock_entry(entry_ref); if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE) { @@ -983,9 +985,15 @@ pgstat_build_snapshot(void) entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context, kind_info->shared_size); + /* + * We're directly accesing the shared stats entry not using + * reference. pg_stat_lock_entry() cannot be used here. + */ + LWLockAcquire(&stats_data->lock, LW_SHARED); memcpy(entry->data, pgstat_get_entry_data(kind, stats_data), kind_info->shared_size); + LWLockRelease(&stats_data->lock); } dshash_seq_term(&hstat); diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c index d9275611f0..fdf4d022c1 100644 --- a/src/backend/utils/activity/pgstat_database.c +++ b/src/backend/utils/activity/pgstat_database.c @@ -364,7 +364,7 @@ pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) pendingent = (PgStat_StatDBEntry *) entry_ref->pending; sharedent = (PgStatShared_Database *) entry_ref->shared_stats; - if (!pgstat_lock_entry(entry_ref, nowait)) + if (!pgstat_lock_entry(entry_ref, LW_EXCLUSIVE, nowait)) return false; #define PGSTAT_ACCUM_DBCOUNT(item) \ diff --git a/src/backend/utils/activity/pgstat_function.c b/src/backend/utils/activity/pgstat_function.c index 427d8c47fc..318db0b3c8 100644 --- a/src/backend/utils/activity/pgstat_function.c +++ b/src/backend/utils/activity/pgstat_function.c @@ -200,7 +200,7 @@ pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) /* localent always has non-zero content */ - if (!pgstat_lock_entry(entry_ref, nowait)) + if (!pgstat_lock_entry(entry_ref, LW_EXCLUSIVE, nowait)) return false; shfuncent->stats.f_numcalls += localent->f_counts.f_numcalls; diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index a846d9ffb6..98dda726db 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -782,7 +782,7 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) return true; } - if (!pgstat_lock_entry(entry_ref, nowait)) + if (!pgstat_lock_entry(entry_ref, LW_EXCLUSIVE, nowait)) return false; /* add the values to the shared entry. */ diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index 89060ef29a..fdd20d80a1 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -568,14 +568,14 @@ pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, } bool -pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait) +pgstat_lock_entry(PgStat_EntryRef *entry_ref, LWLockMode mode, bool nowait) { LWLock *lock = &entry_ref->shared_stats->lock; if (nowait) - return LWLockConditionalAcquire(lock, LW_EXCLUSIVE); + return LWLockConditionalAcquire(lock, mode); - LWLockAcquire(lock, LW_EXCLUSIVE); + LWLockAcquire(lock, mode); return true; } @@ -598,7 +598,7 @@ pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid, entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL); /* lock the shared entry to protect the content, skip if failed */ - if (!pgstat_lock_entry(entry_ref, nowait)) + if (!pgstat_lock_entry(entry_ref, LW_EXCLUSIVE, nowait)) return NULL; return entry_ref; @@ -920,7 +920,7 @@ pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts) if (!entry_ref || entry_ref->shared_entry->dropped) return; - (void) pgstat_lock_entry(entry_ref, false); + (void) pgstat_lock_entry(entry_ref, LW_EXCLUSIVE, false); shared_stat_reset_contents(kind, entry_ref->shared_stats, ts); pgstat_unlock_entry(entry_ref); } diff --git a/src/backend/utils/activity/pgstat_subscription.c b/src/backend/utils/activity/pgstat_subscription.c index e1072bd5ba..eeb2e3370f 100644 --- a/src/backend/utils/activity/pgstat_subscription.c +++ b/src/backend/utils/activity/pgstat_subscription.c @@ -91,7 +91,7 @@ pgstat_subscription_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) /* localent always has non-zero content */ - if (!pgstat_lock_entry(entry_ref, nowait)) + if (!pgstat_lock_entry(entry_ref, LW_EXCLUSIVE, nowait)) return false; #define SUB_ACC(fld) shsubent->stats.fld += localent->fld diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 9303d05427..fe1ac0f274 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -580,7 +580,7 @@ extern void pgstat_detach_shmem(void); extern PgStat_EntryRef *pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create, bool *found); -extern bool pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait); +extern bool pgstat_lock_entry(PgStat_EntryRef *entry_ref, LWLockMode mode, bool nowait); extern void pgstat_unlock_entry(PgStat_EntryRef *entry_ref); extern bool pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid); extern void pgstat_drop_all_entries(void);