On Thu, Mar 19, 2020 at 03:44:49PM -0700, Andres Freund wrote:
> But uh, unfortunately the vacuum delay code just sleeps without setting
> a wait event:
...
> Seems like it should instead use a new wait event in the PG_WAIT_TIMEOUT
> class?
> 
> Given how frequently we run into trouble with [auto]vacuum throttling
> being a problem, and there not being any way to monitor that currently,
> that seems like it'd be a significant improvement, given the effort?

I see that pg_sleep sets WAIT_EVENT_PG_SLEEP, but pg_usleep doesn't, I guess
because the overhead is significant for a small number of usecs, and because it
doesn't work for pg_usleep to set a generic event if callers want to be able to
set a more specific wait event.

Also, I noticed that SLEEP_ON_ASSERT comment (31338352b) wants to use pg_usleep
"which seems too short.".  Surely it should use pg_sleep, added at 782eefc58 a
few years later ?

Also, there was a suggestion recently that this should have a separate
vacuum_progress phase:
|vacuumlazy.c:#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL        50 /* ms */
|vacuumlazy.c:pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L);

I was planning to look at that eventually ; should it have a wait event instead
or in addition ?

> It'd probably also be helpful to report the total time [auto]vacuum
> spent being delayed for vacuum verbose/autovacuum logging, but imo
> that'd be a parallel feature to a wait event, not a replacement.

This requires wider changes than I anticipated.

2020-03-20 22:35:51.308 CDT [16534] LOG:  automatic aggressive vacuum of table 
"template1.pg_catalog.pg_class": index scans: 1
        pages: 0 removed, 11 remain, 0 skipped due to pins, 0 skipped frozen
        tuples: 6 removed, 405 remain, 0 are dead but not yet removable, oldest 
xmin: 1574
        buffer usage: 76 hits, 7 misses, 8 dirtied
        avg read rate: 16.378 MB/s, avg write rate: 18.718 MB/s
        Cost-based delay: 2 msec
        system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s

VACUUM VERBOSE wouldn't normally be run with cost_delay > 0, so that field will
typically be zero, so I made it conditional, which is supposedly why it's
written like that, even though that's not otherwise being used since 17eaae98.

Added at https://commitfest.postgresql.org/28/2515/

-- 
Justin
>From 68c5ad8c7a9feb0c68afad310e3f52c21c3cdbaf Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Fri, 20 Mar 2020 20:47:30 -0500
Subject: [PATCH v1 1/2] Report wait event for cost-based vacuum delay

---
 doc/src/sgml/monitoring.sgml    | 2 ++
 src/backend/commands/vacuum.c   | 2 ++
 src/backend/postmaster/pgstat.c | 3 +++
 src/include/pgstat.h            | 3 ++-
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 5bffdcce10..46c99a55b7 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1507,6 +1507,8 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
           (<filename>pg_wal</filename>, archive or stream) before trying
           again to retrieve WAL data, at recovery.
          </entry>
+         <entry><literal>VacuumDelay</literal></entry>
+         <entry>Waiting in a cost-based vacuum delay point.</entry>
         </row>
         <row>
          <entry morerows="68"><literal>IO</literal></entry>
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index d625d17bf4..59731d687f 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -2019,7 +2019,9 @@ vacuum_delay_point(void)
 		if (msec > VacuumCostDelay * 4)
 			msec = VacuumCostDelay * 4;
 
+		pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
 		pg_usleep((long) (msec * 1000));
+		pgstat_report_wait_end();
 
 		VacuumCostBalance = 0;
 
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index d29c211a76..742ec07b59 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3824,6 +3824,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
 		case WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL:
 			event_name = "RecoveryRetrieveRetryInterval";
 			break;
+		case WAIT_EVENT_VACUUM_DELAY:
+			event_name = "VacuumDelay";
+			break;
 			/* no default case, so that compiler will warn */
 	}
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 851d0a7246..4db40e23cc 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -848,7 +848,8 @@ typedef enum
 	WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT,
 	WAIT_EVENT_PG_SLEEP,
 	WAIT_EVENT_RECOVERY_APPLY_DELAY,
-	WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL
+	WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
+	WAIT_EVENT_VACUUM_DELAY,
 } WaitEventTimeout;
 
 /* ----------
-- 
2.17.0

>From 8153d909cabb94474890f0b55c7733f33923e3c5 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Fri, 20 Mar 2020 22:08:09 -0500
Subject: [PATCH v1 2/2] vacuum to report time spent in cost-based delay

---
 src/backend/access/gin/ginfast.c      |  6 +++---
 src/backend/access/gin/ginvacuum.c    |  6 +++---
 src/backend/access/gist/gistvacuum.c  |  2 +-
 src/backend/access/hash/hash.c        |  2 +-
 src/backend/access/heap/vacuumlazy.c  | 17 +++++++++++------
 src/backend/access/nbtree/nbtree.c    |  2 +-
 src/backend/access/spgist/spgvacuum.c |  4 ++--
 src/backend/commands/vacuum.c         |  8 ++++++--
 src/include/access/genam.h            |  1 +
 src/include/commands/vacuum.h         |  2 +-
 10 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 11d7ec067a..c99dc4a8be 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -892,7 +892,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
 		 */
 		processPendingPage(&accum, &datums, page, FirstOffsetNumber);
 
-		vacuum_delay_point();
+		stats->delay_msec += vacuum_delay_point();
 
 		/*
 		 * Is it time to flush memory to disk?	Flush if we are at the end of
@@ -929,7 +929,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
 			{
 				ginEntryInsert(ginstate, attnum, key, category,
 							   list, nlist, NULL);
-				vacuum_delay_point();
+				stats->delay_msec += vacuum_delay_point();
 			}
 
 			/*
@@ -1002,7 +1002,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean,
 		/*
 		 * Read next page in pending list
 		 */
-		vacuum_delay_point();
+		stats->delay_msec += vacuum_delay_point();
 		buffer = ReadBuffer(index, blkno);
 		LockBuffer(buffer, GIN_SHARE);
 		page = BufferGetPage(buffer);
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 8ae4fd95a7..3ab88153cb 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -662,12 +662,12 @@ ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 			UnlockReleaseBuffer(buffer);
 		}
 
-		vacuum_delay_point();
+		stats->delay_msec += vacuum_delay_point();
 
 		for (i = 0; i < nRoot; i++)
 		{
 			ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
-			vacuum_delay_point();
+			stats->delay_msec += vacuum_delay_point();
 		}
 
 		if (blkno == InvalidBlockNumber)	/* rightmost page */
@@ -748,7 +748,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 		Buffer		buffer;
 		Page		page;
 
-		vacuum_delay_point();
+		stats->delay_msec += vacuum_delay_point();
 
 		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
 									RBM_NORMAL, info->strategy);
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index a9c616c772..d58b11486c 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -265,7 +265,7 @@ restart:
 	recurse_to = InvalidBlockNumber;
 
 	/* call vacuum_delay_point while not holding any buffer lock */
-	vacuum_delay_point();
+	vstate->stats->delay_msec += vacuum_delay_point();
 
 	buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
 								info->strategy);
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 4871b7ff4d..86a9c7fdaa 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -709,7 +709,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		bool		retain_pin = false;
 		bool		clear_dead_marking = false;
 
-		vacuum_delay_point();
+		// XXX stats->delay_msec += vacuum_delay_point();
 
 		page = BufferGetPage(buf);
 		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 03c43efc32..b03b066aa4 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -285,6 +285,7 @@ typedef struct LVRelStats
 	double		new_dead_tuples;	/* new estimated total # of dead tuples */
 	BlockNumber pages_removed;
 	double		tuples_deleted;
+	double		delay_msec;	/* milliseconds delay by vacuum_delay_point() */
 	BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
 	LVDeadTuples *dead_tuples;
 	int			num_index_scans;
@@ -464,6 +465,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 	vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
 	vacrelstats->num_index_scans = 0;
 	vacrelstats->pages_removed = 0;
+	vacrelstats->delay_msec = 0;
 	vacrelstats->lock_waiter_detected = false;
 
 	/* Open all indexes of the relation */
@@ -620,8 +622,8 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 							 (long long) VacuumPageDirty);
 			appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
 							 read_rate, write_rate);
-			appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
-
+			appendStringInfo(&buf, _("system usage: %s, cost-based delay: %.0f msec"),
+							 pg_rusage_show(&ru0), vacrelstats->delay_msec);
 			ereport(LOG,
 					(errmsg_internal("%s", buf.data)));
 			pfree(buf.data);
@@ -860,7 +862,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 				if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
 					break;
 			}
-			vacuum_delay_point();
+			vacrelstats->delay_msec += vacuum_delay_point();
 			next_unskippable_block++;
 		}
 	}
@@ -916,7 +918,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 						if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
 							break;
 					}
-					vacuum_delay_point();
+					vacrelstats->delay_msec += vacuum_delay_point();
 					next_unskippable_block++;
 				}
 			}
@@ -966,7 +968,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			all_visible_according_to_vm = true;
 		}
 
-		vacuum_delay_point();
+		vacrelstats->delay_msec += vacuum_delay_point();
 
 		/*
 		 * If we are close to overrunning the available space for dead-tuple
@@ -1690,6 +1692,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 									"%u pages are entirely empty.\n",
 									empty_pages),
 					 empty_pages);
+	if (vacrelstats->delay_msec > 0)
+		appendStringInfo(&buf, "Cost-based delay: %.0f msec\n",
+										vacrelstats->delay_msec);
 	appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
 
 	ereport(elevel,
@@ -1788,7 +1793,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 		Page		page;
 		Size		freespace;
 
-		vacuum_delay_point();
+		vacrelstats->delay_msec += vacuum_delay_point();
 
 		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
 		buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 4bb16297c3..7ac8f6df3c 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -1098,7 +1098,7 @@ restart:
 	recurse_to = P_NONE;
 
 	/* call vacuum_delay_point while not holding any buffer lock */
-	vacuum_delay_point();
+	vstate->stats->delay_msec += vacuum_delay_point();
 
 	/*
 	 * We can't use _bt_getbuf() here because it always applies
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index bd98707f3c..d531718a89 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -611,7 +611,7 @@ spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
 	Page		page;
 
 	/* call vacuum_delay_point while not holding any buffer lock */
-	vacuum_delay_point();
+	bds->stats->delay_msec += vacuum_delay_point();
 
 	buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
 								RBM_NORMAL, bds->info->strategy);
@@ -690,7 +690,7 @@ spgprocesspending(spgBulkDeleteState *bds)
 			continue;			/* ignore already-done items */
 
 		/* call vacuum_delay_point while not holding any buffer lock */
-		vacuum_delay_point();
+		bds->stats->delay_msec += vacuum_delay_point();
 
 		/* examine the referenced page */
 		blkno = ItemPointerGetBlockNumber(&pitem->tid);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 59731d687f..314513512c 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1990,10 +1990,11 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
 /*
  * vacuum_delay_point --- check for interrupts and cost-based delay.
  *
+ * Return the number of milliseconds delayed.
  * This should be called in each major loop of VACUUM processing,
  * typically once per page processed.
  */
-void
+double
 vacuum_delay_point(void)
 {
 	double		msec = 0;
@@ -2002,7 +2003,7 @@ vacuum_delay_point(void)
 	CHECK_FOR_INTERRUPTS();
 
 	if (!VacuumCostActive || InterruptPending)
-		return;
+		return 0;
 
 	/*
 	 * For parallel vacuum, the delay is computed based on the shared cost
@@ -2030,7 +2031,10 @@ vacuum_delay_point(void)
 
 		/* Might have gotten an interrupt while sleeping */
 		CHECK_FOR_INTERRUPTS();
+		return msec;
 	}
+
+	return 0;
 }
 
 /*
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 7e9364a50c..1cbd651173 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -78,6 +78,7 @@ typedef struct IndexBulkDeleteResult
 	double		tuples_removed; /* # removed during vacuum operation */
 	BlockNumber pages_deleted;	/* # unused pages in index */
 	BlockNumber pages_free;		/* # pages available for reuse */
+	double		delay_msec;		/* milliseconds delay by vacuum_delay_point() */
 } IndexBulkDeleteResult;
 
 /* Typedef for callback function to determine if a tuple is bulk-deletable */
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 2779bea5c9..856714034b 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -273,7 +273,7 @@ extern void vacuum_set_xid_limits(Relation rel,
 								  MultiXactId *multiXactCutoff,
 								  MultiXactId *mxactFullScanLimit);
 extern void vac_update_datfrozenxid(void);
-extern void vacuum_delay_point(void);
+extern double vacuum_delay_point(void);
 extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple,
 									 int options);
 extern Relation vacuum_open_relation(Oid relid, RangeVar *relation,
-- 
2.17.0

Reply via email to