On Mon, 2020-03-02 at 14:57 +0100, I wrote:
> But I think it would be good to have *something* that addresses the immediate
> problem (INSERT-only tables are autovacuumed too late), as long as
> that does not have negative side-effects or blocks further improvements.
> 
> I don't feel totally well with the very simplistic approach of this
> patch (use the same metric to trigger autoanalyze and autovacuum),
> but what about this:
> 
> - a new table storage option autovacuum_vacuum_insert_threshold,
>   perhaps a GUC of the same name, by default deactivated.
> 
> - if tabentry->tuples_inserted exceeds this threshold, but not one
>   of the others, lauch autovacuum with index_cleanup=off.

As a more substantial base for discussion, here is a patch that:

- introduces a GUC and reloption "autovacuum_vacuum_insert_limit",
  default 10000000

- introduces a statistics counter "inserts_since_vacuum" per table
  that gets reset to 0 after vacuum

- causes autovacuum to run without cleaning up indexes if
  inserts_since_vacuum > autovacuum_vacuum_insert_limit
  and there is no other reason for an autovacuum

No doc patch is included yet, and perhaps the new counter should
be shown in "pg_stat_user_tables".

Yours,
Laurenz Albe
From fee7443b4b8d965c77f90a631f1245217624bd44 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.a...@cybertec.at>
Date: Tue, 3 Mar 2020 16:21:01 +0100
Subject: [PATCH] Autovacuum tables that have received only inserts

Add "autovacuum_vacuum_insert_limit" GUC and reloption,
the default value is 10000000.
Any table that has received more inserts since it was
last vacuumed (and that is not vacuumed for another
reason) will be autovacuumed with "index_cleanup off".

This avoids the known problem that insert-only tables
are never autovacuumed until they need to have their
anti-wraparound autovacuum, which then can be massive
and disruptive.

To track the number of inserts since the last vacuum,
introduce a StatTabEntry "inserts_since_vacuum" that
gets reset to 0 after a vacuum.
---
 src/backend/access/common/reloptions.c        | 11 ++++++
 src/backend/postmaster/autovacuum.c           | 39 ++++++++++++++++---
 src/backend/postmaster/pgstat.c               |  5 +++
 src/backend/utils/misc/guc.c                  |  9 +++++
 src/backend/utils/misc/postgresql.conf.sample |  2 +
 src/bin/psql/tab-complete.c                   |  2 +
 src/include/pgstat.h                          |  1 +
 src/include/postmaster/autovacuum.h           |  1 +
 src/include/utils/rel.h                       |  1 +
 9 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 5325dd3f61..9c3d8b174e 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -233,6 +233,15 @@ static relopt_int intRelOpts[] =
 		},
 		-1, 0, INT_MAX
 	},
+	{
+		{
+			"autovacuum_vacuum_insert_limit",
+			"Maximum number of tuple inserts prior to vacuum",
+			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+			ShareUpdateExclusiveLock
+		},
+		-1, 0, INT_MAX
+	},
 	{
 		{
 			"autovacuum_analyze_threshold",
@@ -1501,6 +1510,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, enabled)},
 		{"autovacuum_vacuum_threshold", RELOPT_TYPE_INT,
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_threshold)},
+		{"autovacuum_vacuum_insert_limit", RELOPT_TYPE_INT,
+		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_ins_limit)},
 		{"autovacuum_analyze_threshold", RELOPT_TYPE_INT,
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)},
 		{"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT,
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index e3a43d3296..516a8103c3 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -116,6 +116,7 @@ int			autovacuum_max_workers;
 int			autovacuum_work_mem = -1;
 int			autovacuum_naptime;
 int			autovacuum_vac_thresh;
+int			autovacuum_vac_ins_limit;
 double		autovacuum_vac_scale;
 int			autovacuum_anl_thresh;
 double		autovacuum_anl_scale;
@@ -330,7 +331,8 @@ static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
 									  Form_pg_class classForm,
 									  PgStat_StatTabEntry *tabentry,
 									  int effective_multixact_freeze_max_age,
-									  bool *dovacuum, bool *doanalyze, bool *wraparound);
+									  bool *dovacuum, bool *doanalyze, bool *wraparound,
+									  bool *skip_indexes);
 
 static void autovacuum_do_vac_analyze(autovac_table *tab,
 									  BufferAccessStrategy bstrategy);
@@ -2056,6 +2058,7 @@ do_autovacuum(void)
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
+		bool		skip_indexes;
 
 		if (classForm->relkind != RELKIND_RELATION &&
 			classForm->relkind != RELKIND_MATVIEW)
@@ -2096,7 +2099,8 @@ do_autovacuum(void)
 		/* Check if it needs vacuum or analyze */
 		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
 								  effective_multixact_freeze_max_age,
-								  &dovacuum, &doanalyze, &wraparound);
+								  &dovacuum, &doanalyze, &wraparound,
+								  &skip_indexes);
 
 		/* Relations that need work are added to table_oids */
 		if (dovacuum || doanalyze)
@@ -2149,6 +2153,7 @@ do_autovacuum(void)
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
+		bool		skip_indexes;
 
 		/*
 		 * We cannot safely process other backends' temp tables, so skip 'em.
@@ -2179,7 +2184,8 @@ do_autovacuum(void)
 
 		relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
 								  effective_multixact_freeze_max_age,
-								  &dovacuum, &doanalyze, &wraparound);
+								  &dovacuum, &doanalyze, &wraparound,
+								  &skip_indexes);
 
 		/* ignore analyze for toast tables */
 		if (dovacuum)
@@ -2780,6 +2786,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 	PgStat_StatDBEntry *shared;
 	PgStat_StatDBEntry *dbentry;
 	bool		wraparound;
+	bool		skip_indexes;
 	AutoVacOpts *avopts;
 
 	/* use fresh stats */
@@ -2816,7 +2823,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 
 	relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
 							  effective_multixact_freeze_max_age,
-							  &dovacuum, &doanalyze, &wraparound);
+							  &dovacuum, &doanalyze, &wraparound,
+							  &skip_indexes);
 
 	/* ignore ANALYZE for toast tables */
 	if (classForm->relkind == RELKIND_TOASTVALUE)
@@ -2885,7 +2893,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 			(dovacuum ? VACOPT_VACUUM : 0) |
 			(doanalyze ? VACOPT_ANALYZE : 0) |
 			(!wraparound ? VACOPT_SKIP_LOCKED : 0);
-		tab->at_params.index_cleanup = VACOPT_TERNARY_DEFAULT;
+		tab->at_params.index_cleanup = skip_indexes ? VACOPT_TERNARY_DISABLED : VACOPT_TERNARY_DEFAULT;
 		tab->at_params.truncate = VACOPT_TERNARY_DEFAULT;
 		/* As of now, we don't support parallel vacuum for autovacuum */
 		tab->at_params.nworkers = -1;
@@ -2961,7 +2969,8 @@ relation_needs_vacanalyze(Oid relid,
  /* output params below */
 						  bool *dovacuum,
 						  bool *doanalyze,
-						  bool *wraparound)
+						  bool *wraparound,
+						  bool *skip_indexes)
 {
 	bool		force_vacuum;
 	bool		av_enabled;
@@ -2969,6 +2978,7 @@ relation_needs_vacanalyze(Oid relid,
 
 	/* constants from reloptions or GUC variables */
 	int			vac_base_thresh,
+				vac_ins_limit,
 				anl_base_thresh;
 	float4		vac_scale_factor,
 				anl_scale_factor;
@@ -3005,6 +3015,10 @@ relation_needs_vacanalyze(Oid relid,
 		? relopts->vacuum_threshold
 		: autovacuum_vac_thresh;
 
+	vac_ins_limit = (relopts && relopts->vacuum_ins_limit >= 0)
+		? relopts->vacuum_ins_limit
+		: autovacuum_vac_ins_limit;
+
 	anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
 		? relopts->analyze_scale_factor
 		: autovacuum_anl_scale;
@@ -3023,6 +3037,9 @@ relation_needs_vacanalyze(Oid relid,
 
 	av_enabled = (relopts ? relopts->enabled : true);
 
+	/* process indexes by default */
+	*skip_indexes = false;
+
 	/* Force vacuum if table is at risk of wraparound */
 	xidForceLimit = recentXid - freeze_max_age;
 	if (xidForceLimit < FirstNormalTransactionId)
@@ -3076,6 +3093,16 @@ relation_needs_vacanalyze(Oid relid,
 		/* Determine if this table needs vacuum or analyze. */
 		*dovacuum = force_vacuum || (vactuples > vacthresh);
 		*doanalyze = (anltuples > anlthresh);
+		/*
+		 * If the number of inserted tuples exceeds the limit
+		 * and no vacuum is necessary for other reasons,
+		 * vacuum without cleaning up indexes.
+		 */
+		if (!(*dovacuum) && vac_ins_limit < tabentry->inserts_since_vacuum)
+		{
+			*dovacuum = true;
+			*skip_indexes = true;
+		}
 	}
 	else
 	{
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 462b4d7e06..78bd5cf90d 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -4791,6 +4791,7 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
 		result->n_live_tuples = 0;
 		result->n_dead_tuples = 0;
 		result->changes_since_analyze = 0;
+		result->inserts_since_vacuum = 0;
 		result->blocks_fetched = 0;
 		result->blocks_hit = 0;
 		result->vacuum_timestamp = 0;
@@ -5921,6 +5922,7 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 			tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
 			tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
 			tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
+			tabentry->inserts_since_vacuum = tabmsg->t_counts.t_tuples_inserted;
 			tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
 			tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
 
@@ -5950,10 +5952,12 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 			{
 				tabentry->n_live_tuples = 0;
 				tabentry->n_dead_tuples = 0;
+				tabentry->inserts_since_vacuum = 0;
 			}
 			tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
 			tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
 			tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
+			tabentry->inserts_since_vacuum += tabmsg->t_counts.t_tuples_inserted;
 			tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
 			tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
 		}
@@ -6187,6 +6191,7 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
 
 	tabentry->n_live_tuples = msg->m_live_tuples;
 	tabentry->n_dead_tuples = msg->m_dead_tuples;
+	tabentry->inserts_since_vacuum = 0;
 
 	if (msg->m_autovacuum)
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 464f264d9a..f16ad5b4a0 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3063,6 +3063,15 @@ static struct config_int ConfigureNamesInt[] =
 		50, 0, INT_MAX,
 		NULL, NULL, NULL
 	},
+	{
+		{"autovacuum_vacuum_insert_limit", PGC_SIGHUP, AUTOVACUUM,
+			gettext_noop("Maximum number of tuple inserts prior to vacuum."),
+			NULL
+		},
+		&autovacuum_vac_ins_limit,
+		10000000, 0, INT_MAX,
+		NULL, NULL, NULL
+	},
 	{
 		{"autovacuum_analyze_threshold", PGC_SIGHUP, AUTOVACUUM,
 			gettext_noop("Minimum number of tuple inserts, updates, or deletes prior to analyze."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e58e4788a8..9d96d58ed2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -598,6 +598,8 @@
 #autovacuum_naptime = 1min		# time between autovacuum runs
 #autovacuum_vacuum_threshold = 50	# min number of row updates before
 					# vacuum
+#autovacuum_vacuum_insert_limit = 50	# max number of row inserts before
+					# vacuum
 #autovacuum_analyze_threshold = 50	# min number of row updates before
 					# analyze
 #autovacuum_vacuum_scale_factor = 0.2	# fraction of table size before vacuum
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index b6b08d0ccb..badba9f26f 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1071,6 +1071,7 @@ static const char *const table_storage_parameters[] = {
 	"autovacuum_multixact_freeze_table_age",
 	"autovacuum_vacuum_cost_delay",
 	"autovacuum_vacuum_cost_limit",
+	"autovacuum_vacuum_insert_limit",
 	"autovacuum_vacuum_scale_factor",
 	"autovacuum_vacuum_threshold",
 	"fillfactor",
@@ -1085,6 +1086,7 @@ static const char *const table_storage_parameters[] = {
 	"toast.autovacuum_multixact_freeze_table_age",
 	"toast.autovacuum_vacuum_cost_delay",
 	"toast.autovacuum_vacuum_cost_limit",
+	"toast.autovacuum_vacuum_insert_limit",
 	"toast.autovacuum_vacuum_scale_factor",
 	"toast.autovacuum_vacuum_threshold",
 	"toast.log_autovacuum_min_duration",
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 3a65a51696..7ccbb3191e 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -646,6 +646,7 @@ typedef struct PgStat_StatTabEntry
 	PgStat_Counter n_live_tuples;
 	PgStat_Counter n_dead_tuples;
 	PgStat_Counter changes_since_analyze;
+	PgStat_Counter inserts_since_vacuum;
 
 	PgStat_Counter blocks_fetched;
 	PgStat_Counter blocks_hit;
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index d40ed55531..5979bddfa8 100644
--- a/src/include/postmaster/autovacuum.h
+++ b/src/include/postmaster/autovacuum.h
@@ -32,6 +32,7 @@ extern int	autovacuum_max_workers;
 extern int	autovacuum_work_mem;
 extern int	autovacuum_naptime;
 extern int	autovacuum_vac_thresh;
+extern int	autovacuum_vac_ins_limit;
 extern double autovacuum_vac_scale;
 extern int	autovacuum_anl_thresh;
 extern double autovacuum_anl_scale;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 44ed04dd3f..f13e64f731 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -252,6 +252,7 @@ typedef struct AutoVacOpts
 {
 	bool		enabled;
 	int			vacuum_threshold;
+	int			vacuum_ins_limit;
 	int			analyze_threshold;
 	int			vacuum_cost_limit;
 	int			freeze_min_age;
-- 
2.21.1

Reply via email to