From b7f14a0925aba3c25e00d4be19feae503bab5add Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Fri, 25 Nov 2022 11:23:20 -0800
Subject: [PATCH v1] Add "table age" trigger concept to autovacuum.

Teach autovacuum.c to launch "table age" autovacuums at the same point
that it previously triggered antiwraparound autovacuums.  Antiwraparound
autovacuums are retained, but are only used as a true option of last
resort, when regular autovacuum has presumably tried and failed to
advance relfrozenxid (likely because the auto-cancel behavior kept
cancelling regular autovacuums triggered based on table age).  The
special auto-cancellation behavior applied by antiwraparound autovacuums
is known to cause problems in the field, so it should be avoided until
it proves necessary.  It should only be used when the risk of disruptive
lock conflicts (perhaps due to automated DDL run by the application)
starts to look like the lesser risk compared to the risk of the system
reaching xidStopLimit.

Also add new instrumentation that lists a triggering condition in the
server log whenever an autovacuum is logged.  This reports "table age"
as the triggering criteria, for both regular and antiwraparound
autovacuums where that's appropriate (though it's always appropriate
with antiwraparound autovacuums). In other cases the instrumentation
will report when an autovacuum was launched due to the table crossing
the threshold for tuples inserted or the threshold for dead tuples.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Jeff Davis <pgsql@j-davis.com>
Discussion: https://postgr.es/m/CAH2-Wz=S-R_2rO49Hm94Nuvhu9_twRGbTm6uwDRmRu-Sqn_t3w@mail.gmail.com
---
 src/include/commands/vacuum.h        |  15 +++-
 src/include/storage/proc.h           |   2 +-
 src/backend/access/heap/vacuumlazy.c |  12 +++
 src/backend/commands/vacuum.c        |   9 +-
 src/backend/postmaster/autovacuum.c  | 123 ++++++++++++++++++++-------
 src/backend/storage/lmgr/proc.c      |   4 +-
 6 files changed, 126 insertions(+), 39 deletions(-)

diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index b63751c46..074eebe19 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -189,6 +189,18 @@ typedef struct VacAttrStats
 #define VACOPT_PROCESS_TOAST 0x40	/* process the TOAST table, if any */
 #define VACOPT_DISABLE_PAGE_SKIPPING 0x80	/* don't skip any pages */
 
+/*
+ * Values used by autovacuum.c to tell vacuumlazy.c about the specific
+ * threshold type that triggered an autovacuum worker
+ */
+typedef enum AutoVacType
+{
+	AUTOVACUUM_NONE = 0,
+	AUTOVACUUM_TABLEAGE,
+	AUTOVACUUM_DEAD_TUPLES,
+	AUTOVACUUM_INSERTED_TUPLES,
+} AutoVacType;
+
 /*
  * Values used by index_cleanup and truncate params.
  *
@@ -220,7 +232,8 @@ typedef struct VacuumParams
 											 * use default */
 	int			multixact_freeze_table_age; /* multixact age at which to scan
 											 * whole table */
-	bool		is_wraparound;	/* force a for-wraparound vacuum */
+	bool		is_wraparound;	/* antiwraparound autovacuum */
+	AutoVacType	trigger;		/* autovacuum launched to advance table age */
 	int			log_min_duration;	/* minimum execution threshold in ms at
 									 * which autovacuum is logged, -1 to use
 									 * default */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index aa13e1d66..32f3444e0 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -59,7 +59,7 @@ struct XidCache
 										 * CONCURRENTLY or REINDEX
 										 * CONCURRENTLY on non-expressional,
 										 * non-partial index */
-#define		PROC_VACUUM_FOR_WRAPAROUND	0x08	/* set by autovac only */
+#define		PROC_VACUUM_FOR_WRAPAROUND	0x08	/* emergency autovac */
 #define		PROC_IN_LOGICAL_DECODING	0x10	/* currently doing logical
 												 * decoding outside xact */
 #define		PROC_AFFECTS_ALL_HORIZONS	0x20	/* this proc's xmin must be
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index d59711b7e..62d21c22d 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -669,6 +669,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 				 * implies aggressive.  Produce distinct output for the corner
 				 * case all the same, just in case.
 				 */
+				Assert(params->trigger == AUTOVACUUM_TABLEAGE);
 				if (aggressive)
 					msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
 				else
@@ -686,6 +687,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 							 vacrel->relnamespace,
 							 vacrel->relname,
 							 vacrel->num_index_scans);
+			if (!verbose)
+			{
+				Assert(IsAutoVacuumWorkerProcess() &&
+					   params->trigger != AUTOVACUUM_NONE);
+				if (params->trigger == AUTOVACUUM_TABLEAGE)
+					appendStringInfo(&buf, _("autovacuum trigger: table age threshold\n"));
+				else if (params->trigger == AUTOVACUUM_DEAD_TUPLES)
+					appendStringInfo(&buf, _("autovacuum trigger: dead tuples threshold\n"));
+				else if (params->trigger == AUTOVACUUM_INSERTED_TUPLES)
+					appendStringInfo(&buf, _("autovacuum trigger: inserted tuples threshold\n"));
+			}
 			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
 							 vacrel->removed_pages,
 							 new_rel_pages,
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index b5d0ac161..3127dcc95 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -263,8 +263,9 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		params.multixact_freeze_table_age = -1;
 	}
 
-	/* user-invoked vacuum is never "for wraparound" */
+	/* user-invoked vacuum never uses these autovacuum-only flags */
 	params.is_wraparound = false;
+	params.trigger = AUTOVACUUM_NONE;
 
 	/* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
 	params.log_min_duration = -1;
@@ -1858,7 +1859,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 		 *
 		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
 		 * autovacuum; it's used to avoid canceling a vacuum that was invoked
-		 * in an emergency.
+		 * because no earlier vacuum (in particular no earlier "table age"
+		 * autovacuum) ran and advanced relfrozenxid/relminmxid.
 		 *
 		 * Note: these flags remain set until CommitTransaction or
 		 * AbortTransaction.  We don't want to clear them until we reset
@@ -1870,7 +1872,10 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 		MyProc->statusFlags |= PROC_IN_VACUUM;
 		if (params->is_wraparound)
+		{
+			Assert(params->trigger == AUTOVACUUM_TABLEAGE);
 			MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
+		}
 		ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
 		LWLockRelease(ProcArrayLock);
 	}
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 601834d4b..215d07aec 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -327,15 +327,17 @@ static void FreeWorkerInfo(int code, Datum arg);
 static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 											TupleDesc pg_class_desc,
 											int effective_multixact_freeze_max_age);
-static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts,
-											  Form_pg_class classForm,
-											  int effective_multixact_freeze_max_age,
-											  bool *dovacuum, bool *doanalyze, bool *wraparound);
-static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
-									  Form_pg_class classForm,
-									  PgStat_StatTabEntry *tabentry,
-									  int effective_multixact_freeze_max_age,
-									  bool *dovacuum, bool *doanalyze, bool *wraparound);
+static AutoVacType recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts,
+													 Form_pg_class classForm,
+													 int effective_multixact_freeze_max_age,
+													 bool *dovacuum, bool *doanalyze,
+													 bool *wraparound);
+static AutoVacType relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
+											 Form_pg_class classForm,
+											 PgStat_StatTabEntry *tabentry,
+											 int effective_multixact_freeze_max_age,
+											 bool *dovacuum, bool *doanalyze,
+											 bool *wraparound);
 
 static void autovacuum_do_vac_analyze(autovac_table *tab,
 									  BufferAccessStrategy bstrategy);
@@ -1626,7 +1628,7 @@ AutoVacWorkerMain(int argc, char *argv[])
 	/*
 	 * Force synchronous replication off to allow regular maintenance even if
 	 * we are waiting for standbys to connect. This is important to ensure we
-	 * aren't blocked from performing anti-wraparound tasks.
+	 * aren't blocked from performing table age tasks.
 	 */
 	if (synchronous_commit > SYNCHRONOUS_COMMIT_LOCAL_FLUSH)
 		SetConfigOption("synchronous_commit", "local",
@@ -2767,6 +2769,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 	autovac_table *tab = NULL;
 	bool		wraparound;
 	AutoVacOpts *avopts;
+	AutoVacType trigger;
 
 	/* fetch the relation's relcache entry */
 	classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
@@ -2790,9 +2793,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 			avopts = &hentry->ar_reloptions;
 	}
 
-	recheck_relation_needs_vacanalyze(relid, avopts, classForm,
-									  effective_multixact_freeze_max_age,
-									  &dovacuum, &doanalyze, &wraparound);
+	trigger = recheck_relation_needs_vacanalyze(relid, avopts, classForm,
+												effective_multixact_freeze_max_age,
+												&dovacuum, &doanalyze,
+												&wraparound);
 
 	/* OK, it needs something done */
 	if (doanalyze || dovacuum)
@@ -2873,6 +2877,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 		tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age;
 		tab->at_params.multixact_freeze_table_age = multixact_freeze_table_age;
 		tab->at_params.is_wraparound = wraparound;
+		tab->at_params.trigger = trigger;
 		tab->at_params.log_min_duration = log_min_duration;
 		tab->at_vacuum_cost_limit = vac_cost_limit;
 		tab->at_vacuum_cost_delay = vac_cost_delay;
@@ -2901,7 +2906,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
  * Fetch the pgstat of a relation and recheck whether a relation
  * needs to be vacuumed or analyzed.
  */
-static void
+static AutoVacType
 recheck_relation_needs_vacanalyze(Oid relid,
 								  AutoVacOpts *avopts,
 								  Form_pg_class classForm,
@@ -2911,18 +2916,21 @@ recheck_relation_needs_vacanalyze(Oid relid,
 								  bool *wraparound)
 {
 	PgStat_StatTabEntry *tabentry;
+	AutoVacType trigger;
 
 	/* fetch the pgstat table entry */
 	tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared,
 											  relid);
 
-	relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
-							  effective_multixact_freeze_max_age,
-							  dovacuum, doanalyze, wraparound);
+	trigger = relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
+										effective_multixact_freeze_max_age,
+										dovacuum, doanalyze, wraparound);
 
 	/* ignore ANALYZE for toast tables */
 	if (classForm->relkind == RELKIND_TOASTVALUE)
 		*doanalyze = false;
+
+	return trigger;
 }
 
 /*
@@ -2948,8 +2956,8 @@ recheck_relation_needs_vacanalyze(Oid relid,
  * the number of tuples (both live and dead) that there were as of the last
  * analyze.  This is asymmetric to the VACUUM case.
  *
- * We also force vacuum if the table's relfrozenxid is more than freeze_max_age
- * transactions back, and if its relminmxid is more than
+ * We also force tableage vacuum if the table's relfrozenxid is more than
+ * freeze_max_age transactions back, and if its relminmxid is more than
  * multixact_freeze_max_age multixacts back.
  *
  * A table whose autovacuum_enabled option is false is
@@ -2961,8 +2969,15 @@ recheck_relation_needs_vacanalyze(Oid relid,
  * autovacuum_vacuum_threshold GUC variable.  Similarly, a vac_scale_factor
  * value < 0 is substituted with the value of
  * autovacuum_vacuum_scale_factor GUC variable.  Ditto for analyze.
+ *
+ * Return value is the condition that triggered autovacuum to run VACUUM
+ * (useful only when *dovacuum is set).  There can only be exactly one
+ * triggering condition, even when multiple thresholds happened to be crossed
+ * at the same time.  We prefer to return "table age" in the event of such a
+ * conflict, after which we prefer to report "dead tuples" as the criteria,
+ * with "inserted tuples" placed last.
  */
-static void
+static AutoVacType
 relation_needs_vacanalyze(Oid relid,
 						  AutoVacOpts *relopts,
 						  Form_pg_class classForm,
@@ -2973,6 +2988,9 @@ relation_needs_vacanalyze(Oid relid,
 						  bool *doanalyze,
 						  bool *wraparound)
 {
+	TransactionId relfrozenxid = classForm->relfrozenxid;
+	MultiXactId relminmxid = classForm->relminmxid;
+	AutoVacType trigger = AUTOVACUUM_DEAD_TUPLES;
 	bool		force_vacuum;
 	bool		av_enabled;
 	float4		reltuples;		/* pg_class.reltuples */
@@ -3046,40 +3064,72 @@ relation_needs_vacanalyze(Oid relid,
 
 	av_enabled = (relopts ? relopts->enabled : true);
 
-	/* Force vacuum if table is at risk of wraparound */
+	/* Force vacuum if table age exceeds cutoff */
 	xidForceLimit = recentXid - freeze_max_age;
 	if (xidForceLimit < FirstNormalTransactionId)
 		xidForceLimit -= FirstNormalTransactionId;
-	force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
-					TransactionIdPrecedes(classForm->relfrozenxid,
-										  xidForceLimit));
-	if (!force_vacuum)
+	multiForceLimit = recentMulti - multixact_freeze_max_age;
+	if (multiForceLimit < FirstMultiXactId)
+		multiForceLimit -= FirstMultiXactId;
+	force_vacuum = ((TransactionIdIsNormal(relfrozenxid) &&
+					 TransactionIdPrecedes(relfrozenxid, xidForceLimit)) ||
+					(MultiXactIdIsValid(relminmxid) &&
+					 MultiXactIdPrecedes(relminmxid, multiForceLimit)));
+
+	/*
+	 * If we're forcing table age autovacuum, are we at the point where it has
+	 * to be an antiwraparound autovacuum?
+	 *
+	 * A antiwraparound autovacuum is the same as a standard forced/table age
+	 * autovacuum as far as vacuumlazy.c is concerned.  Antiwraparound is an
+	 * emergency mechanism used when everything else (including standard table
+	 * age autovacuums) failed to advance relfrozenxid/relminmxid before now.
+	 * Antiwraparound autovacuums are different to other autovacuums in that
+	 * they cannot be automatically canceled, and are advertised as a distinct
+	 * kind of VACUUM operation in pg_stat_activity.
+	 */
+	*wraparound = false;		/* for now */
+	if (force_vacuum)
 	{
+		trigger = AUTOVACUUM_TABLEAGE;
+
+		freeze_max_age = Min(freeze_max_age * 2, vacuum_failsafe_age / 2);
+		multixact_freeze_max_age = Min(multixact_freeze_max_age * 2,
+									   vacuum_multixact_failsafe_age / 2);
+
+		xidForceLimit = recentXid - freeze_max_age;
+		if (xidForceLimit < FirstNormalTransactionId)
+			xidForceLimit -= FirstNormalTransactionId;
 		multiForceLimit = recentMulti - multixact_freeze_max_age;
 		if (multiForceLimit < FirstMultiXactId)
 			multiForceLimit -= FirstMultiXactId;
-		force_vacuum = MultiXactIdIsValid(classForm->relminmxid) &&
-			MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit);
+
+		*wraparound = ((TransactionIdIsNormal(relfrozenxid) &&
+						TransactionIdPrecedes(relfrozenxid, xidForceLimit)) ||
+					   (MultiXactIdIsValid(relminmxid) &&
+						MultiXactIdPrecedes(relminmxid, multiForceLimit)));
 	}
-	*wraparound = force_vacuum;
 
 	/* User disabled it in pg_class.reloptions?  (But ignore if at risk) */
 	if (!av_enabled && !force_vacuum)
 	{
 		*doanalyze = false;
 		*dovacuum = false;
-		return;
+		return trigger;
 	}
 
 	/*
 	 * If we found stats for the table, and autovacuum is currently enabled,
 	 * make a threshold-based decision whether to vacuum and/or analyze.  If
-	 * autovacuum is currently disabled, we must be here for anti-wraparound
+	 * autovacuum is currently disabled, we must be here for forced table age
 	 * vacuuming only, so don't vacuum (or analyze) anything that's not being
 	 * forced.
 	 */
 	if (PointerIsValid(tabentry) && AutoVacuumingActive())
 	{
+		bool		is_deadtuple,
+					is_insert;
+
 		reltuples = classForm->reltuples;
 		vactuples = tabentry->n_dead_tuples;
 		instuples = tabentry->inserts_since_vacuum;
@@ -3108,15 +3158,20 @@ relation_needs_vacanalyze(Oid relid,
 				 vactuples, vacthresh, anltuples, anlthresh);
 
 		/* Determine if this table needs vacuum or analyze. */
-		*dovacuum = force_vacuum || (vactuples > vacthresh) ||
-			(vac_ins_base_thresh >= 0 && instuples > vacinsthresh);
+		is_deadtuple = (vactuples > vacthresh);
+		is_insert = (vac_ins_base_thresh >= 0 && instuples > vacinsthresh);
+		*dovacuum = (force_vacuum || is_deadtuple || is_insert);
 		*doanalyze = (anltuples > anlthresh);
+
+		/* See header comments about return value */
+		if (!force_vacuum && !is_deadtuple && is_insert)
+			trigger = AUTOVACUUM_INSERTED_TUPLES;
 	}
 	else
 	{
 		/*
 		 * Skip a table not found in stat hash, unless we have to force vacuum
-		 * for anti-wrap purposes.  If it's not acted upon, there's no need to
+		 * for table age purposes.  If it's not acted upon, there's no need to
 		 * vacuum it.
 		 */
 		*dovacuum = force_vacuum;
@@ -3126,6 +3181,8 @@ relation_needs_vacanalyze(Oid relid,
 	/* ANALYZE refuses to work with pg_statistic */
 	if (relid == StatisticRelationId)
 		*doanalyze = false;
+
+	return trigger;
 }
 
 /*
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b1c35653f..2a5498227 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1384,8 +1384,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 			LWLockRelease(ProcArrayLock);
 
 			/*
-			 * Only do it if the worker is not working to protect against Xid
-			 * wraparound.
+			 * Only do it if the worker is not an antiwraparound autovacuum, a
+			 * special type of autovacuum that is only used in emergencies
 			 */
 			if ((statusFlags & PROC_IS_AUTOVACUUM) &&
 				!(statusFlags & PROC_VACUUM_FOR_WRAPAROUND))
-- 
2.34.1

