On Thu, Mar 19, 2020 at 03:44:49PM -0700, Andres Freund wrote: > But uh, unfortunately the vacuum delay code just sleeps without setting > a wait event: ... > Seems like it should instead use a new wait event in the PG_WAIT_TIMEOUT > class? > > Given how frequently we run into trouble with [auto]vacuum throttling > being a problem, and there not being any way to monitor that currently, > that seems like it'd be a significant improvement, given the effort?
I see that pg_sleep sets WAIT_EVENT_PG_SLEEP, but pg_usleep doesn't, I guess because the overhead is significant for a small number of usecs, and because it doesn't work for pg_usleep to set a generic event if callers want to be able to set a more specific wait event. Also, I noticed that SLEEP_ON_ASSERT comment (31338352b) wants to use pg_usleep "which seems too short.". Surely it should use pg_sleep, added at 782eefc58 a few years later ? Also, there was a suggestion recently that this should have a separate vacuum_progress phase: |vacuumlazy.c:#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ |vacuumlazy.c:pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); I was planning to look at that eventually ; should it have a wait event instead or in addition ? > It'd probably also be helpful to report the total time [auto]vacuum > spent being delayed for vacuum verbose/autovacuum logging, but imo > that'd be a parallel feature to a wait event, not a replacement. This requires wider changes than I anticipated. 2020-03-20 22:35:51.308 CDT [16534] LOG: automatic aggressive vacuum of table "template1.pg_catalog.pg_class": index scans: 1 pages: 0 removed, 11 remain, 0 skipped due to pins, 0 skipped frozen tuples: 6 removed, 405 remain, 0 are dead but not yet removable, oldest xmin: 1574 buffer usage: 76 hits, 7 misses, 8 dirtied avg read rate: 16.378 MB/s, avg write rate: 18.718 MB/s Cost-based delay: 2 msec system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s VACUUM VERBOSE wouldn't normally be run with cost_delay > 0, so that field will typically be zero, so I made it conditional, which is supposedly why it's written like that, even though that's not otherwise being used since 17eaae98. Added at https://commitfest.postgresql.org/28/2515/ -- Justin
>From 68c5ad8c7a9feb0c68afad310e3f52c21c3cdbaf Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Fri, 20 Mar 2020 20:47:30 -0500 Subject: [PATCH v1 1/2] Report wait event for cost-based vacuum delay --- doc/src/sgml/monitoring.sgml | 2 ++ src/backend/commands/vacuum.c | 2 ++ src/backend/postmaster/pgstat.c | 3 +++ src/include/pgstat.h | 3 ++- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 5bffdcce10..46c99a55b7 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1507,6 +1507,8 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser (<filename>pg_wal</filename>, archive or stream) before trying again to retrieve WAL data, at recovery. </entry> + <entry><literal>VacuumDelay</literal></entry> + <entry>Waiting in a cost-based vacuum delay point.</entry> </row> <row> <entry morerows="68"><literal>IO</literal></entry> diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index d625d17bf4..59731d687f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2019,7 +2019,9 @@ vacuum_delay_point(void) if (msec > VacuumCostDelay * 4) msec = VacuumCostDelay * 4; + pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); pg_usleep((long) (msec * 1000)); + pgstat_report_wait_end(); VacuumCostBalance = 0; diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index d29c211a76..742ec07b59 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3824,6 +3824,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w) case WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL: event_name = "RecoveryRetrieveRetryInterval"; break; + case WAIT_EVENT_VACUUM_DELAY: + event_name = "VacuumDelay"; + break; /* no default case, so that compiler will warn */ } diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 851d0a7246..4db40e23cc 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -848,7 +848,8 @@ typedef enum WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT, WAIT_EVENT_PG_SLEEP, WAIT_EVENT_RECOVERY_APPLY_DELAY, - WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL + WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL, + WAIT_EVENT_VACUUM_DELAY, } WaitEventTimeout; /* ---------- -- 2.17.0
>From 8153d909cabb94474890f0b55c7733f33923e3c5 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Fri, 20 Mar 2020 22:08:09 -0500 Subject: [PATCH v1 2/2] vacuum to report time spent in cost-based delay --- src/backend/access/gin/ginfast.c | 6 +++--- src/backend/access/gin/ginvacuum.c | 6 +++--- src/backend/access/gist/gistvacuum.c | 2 +- src/backend/access/hash/hash.c | 2 +- src/backend/access/heap/vacuumlazy.c | 17 +++++++++++------ src/backend/access/nbtree/nbtree.c | 2 +- src/backend/access/spgist/spgvacuum.c | 4 ++-- src/backend/commands/vacuum.c | 8 ++++++-- src/include/access/genam.h | 1 + src/include/commands/vacuum.h | 2 +- 10 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 11d7ec067a..c99dc4a8be 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -892,7 +892,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); - vacuum_delay_point(); + stats->delay_msec += vacuum_delay_point(); /* * Is it time to flush memory to disk? Flush if we are at the end of @@ -929,7 +929,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, { ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); - vacuum_delay_point(); + stats->delay_msec += vacuum_delay_point(); } /* @@ -1002,7 +1002,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, /* * Read next page in pending list */ - vacuum_delay_point(); + stats->delay_msec += vacuum_delay_point(); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 8ae4fd95a7..3ab88153cb 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -662,12 +662,12 @@ ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, UnlockReleaseBuffer(buffer); } - vacuum_delay_point(); + stats->delay_msec += vacuum_delay_point(); for (i = 0; i < nRoot; i++) { ginVacuumPostingTree(&gvs, rootOfPostingTree[i]); - vacuum_delay_point(); + stats->delay_msec += vacuum_delay_point(); } if (blkno == InvalidBlockNumber) /* rightmost page */ @@ -748,7 +748,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) Buffer buffer; Page page; - vacuum_delay_point(); + stats->delay_msec += vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index a9c616c772..d58b11486c 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -265,7 +265,7 @@ restart: recurse_to = InvalidBlockNumber; /* call vacuum_delay_point while not holding any buffer lock */ - vacuum_delay_point(); + vstate->stats->delay_msec += vacuum_delay_point(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 4871b7ff4d..86a9c7fdaa 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -709,7 +709,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, bool retain_pin = false; bool clear_dead_marking = false; - vacuum_delay_point(); + // XXX stats->delay_msec += vacuum_delay_point(); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 03c43efc32..b03b066aa4 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -285,6 +285,7 @@ typedef struct LVRelStats double new_dead_tuples; /* new estimated total # of dead tuples */ BlockNumber pages_removed; double tuples_deleted; + double delay_msec; /* milliseconds delay by vacuum_delay_point() */ BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ LVDeadTuples *dead_tuples; int num_index_scans; @@ -464,6 +465,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, vacrelstats->old_live_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; vacrelstats->pages_removed = 0; + vacrelstats->delay_msec = 0; vacrelstats->lock_waiter_detected = false; /* Open all indexes of the relation */ @@ -620,8 +622,8 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, (long long) VacuumPageDirty); appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"), read_rate, write_rate); - appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0)); - + appendStringInfo(&buf, _("system usage: %s, cost-based delay: %.0f msec"), + pg_rusage_show(&ru0), vacrelstats->delay_msec); ereport(LOG, (errmsg_internal("%s", buf.data))); pfree(buf.data); @@ -860,7 +862,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0) break; } - vacuum_delay_point(); + vacrelstats->delay_msec += vacuum_delay_point(); next_unskippable_block++; } } @@ -916,7 +918,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0) break; } - vacuum_delay_point(); + vacrelstats->delay_msec += vacuum_delay_point(); next_unskippable_block++; } } @@ -966,7 +968,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, all_visible_according_to_vm = true; } - vacuum_delay_point(); + vacrelstats->delay_msec += vacuum_delay_point(); /* * If we are close to overrunning the available space for dead-tuple @@ -1690,6 +1692,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, "%u pages are entirely empty.\n", empty_pages), empty_pages); + if (vacrelstats->delay_msec > 0) + appendStringInfo(&buf, "Cost-based delay: %.0f msec\n", + vacrelstats->delay_msec); appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0)); ereport(elevel, @@ -1788,7 +1793,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) Page page; Size freespace; - vacuum_delay_point(); + vacrelstats->delay_msec += vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 4bb16297c3..7ac8f6df3c 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1098,7 +1098,7 @@ restart: recurse_to = P_NONE; /* call vacuum_delay_point while not holding any buffer lock */ - vacuum_delay_point(); + vstate->stats->delay_msec += vacuum_delay_point(); /* * We can't use _bt_getbuf() here because it always applies diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index bd98707f3c..d531718a89 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -611,7 +611,7 @@ spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno) Page page; /* call vacuum_delay_point while not holding any buffer lock */ - vacuum_delay_point(); + bds->stats->delay_msec += vacuum_delay_point(); buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bds->info->strategy); @@ -690,7 +690,7 @@ spgprocesspending(spgBulkDeleteState *bds) continue; /* ignore already-done items */ /* call vacuum_delay_point while not holding any buffer lock */ - vacuum_delay_point(); + bds->stats->delay_msec += vacuum_delay_point(); /* examine the referenced page */ blkno = ItemPointerGetBlockNumber(&pitem->tid); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 59731d687f..314513512c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1990,10 +1990,11 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) /* * vacuum_delay_point --- check for interrupts and cost-based delay. * + * Return the number of milliseconds delayed. * This should be called in each major loop of VACUUM processing, * typically once per page processed. */ -void +double vacuum_delay_point(void) { double msec = 0; @@ -2002,7 +2003,7 @@ vacuum_delay_point(void) CHECK_FOR_INTERRUPTS(); if (!VacuumCostActive || InterruptPending) - return; + return 0; /* * For parallel vacuum, the delay is computed based on the shared cost @@ -2030,7 +2031,10 @@ vacuum_delay_point(void) /* Might have gotten an interrupt while sleeping */ CHECK_FOR_INTERRUPTS(); + return msec; } + + return 0; } /* diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 7e9364a50c..1cbd651173 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -78,6 +78,7 @@ typedef struct IndexBulkDeleteResult double tuples_removed; /* # removed during vacuum operation */ BlockNumber pages_deleted; /* # unused pages in index */ BlockNumber pages_free; /* # pages available for reuse */ + double delay_msec; /* milliseconds delay by vacuum_delay_point() */ } IndexBulkDeleteResult; /* Typedef for callback function to determine if a tuple is bulk-deletable */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 2779bea5c9..856714034b 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -273,7 +273,7 @@ extern void vacuum_set_xid_limits(Relation rel, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit); extern void vac_update_datfrozenxid(void); -extern void vacuum_delay_point(void); +extern double vacuum_delay_point(void); extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options); extern Relation vacuum_open_relation(Oid relid, RangeVar *relation, -- 2.17.0