I frequently hear about scenarios where users with thousands upon thousands of tables realize that autovacuum is struggling to keep up. When they inevitably go to bump up autovacuum_max_workers, they discover that it requires a server restart (i.e., downtime) to take effect, causing further frustration. For this reason, I think $SUBJECT is a desirable improvement. I spent some time looking for past discussions about this, and I was surprised to not find any, so I thought I'd give it a try.
The attached proof-of-concept patch demonstrates what I have in mind. Instead of trying to dynamically change the global process table, etc., I'm proposing that we introduce a new GUC that sets the effective maximum number of autovacuum workers that can be started at any time. This means there would be two GUCs for the number of autovacuum workers: one for the number of slots reserved for autovacuum workers, and another that restricts the number of those slots that can be used. The former would continue to require a restart to change its value, and users would typically want to set it relatively high. The latter could be changed at any time and would allow for raising or lowering the maximum number of active autovacuum workers, up to the limit set by the other parameter. The proof-of-concept patch keeps autovacuum_max_workers as the maximum number of slots to reserve for workers, but I think we should instead rename this parameter to something else and then reintroduce autovacuum_max_workers as the new parameter that can be adjusted without restarting. That way, autovacuum_max_workers continues to work much the same way as in previous versions. There are a couple of weird cases with this approach. One is when the restart-only limit is set lower than the PGC_SIGHUP limit. In that case, I think we should just use the restart-only limit. The other is when there are already N active autovacuum workers and the PGC_SIGHUP parameter is changed to something less than N. For that case, I think we should just block starting additional workers until the number of workers drops below the new parameter's value. I don't think we should kill existing workers, or anything else like that. TBH I've been sitting on this idea for a while now, only because I think it has a slim chance of acceptance, but IMHO this is a simple change that could help many users. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 170b973cc5..e65ddd67c1 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -114,6 +114,7 @@ * GUC parameters */ bool autovacuum_start_daemon = false; +int autovacuum_workers; int autovacuum_max_workers; int autovacuum_work_mem = -1; int autovacuum_naptime; @@ -289,7 +290,7 @@ typedef struct { sig_atomic_t av_signal[AutoVacNumSignals]; pid_t av_launcherpid; - dlist_head av_freeWorkers; + dclist_head av_freeWorkers; dlist_head av_runningWorkers; WorkerInfo av_startingWorker; AutoVacuumWorkItem av_workItems[NUM_WORKITEMS]; @@ -347,6 +348,7 @@ static void autovac_report_activity(autovac_table *tab); static void autovac_report_workitem(AutoVacuumWorkItem *workitem, const char *nspname, const char *relname); static void avl_sigusr2_handler(SIGNAL_ARGS); +static bool autovac_slot_available(void); @@ -575,8 +577,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len) * wakening conditions. */ - launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers), - false, &nap); + launcher_determine_sleep(autovac_slot_available(), false, &nap); /* * Wait until naptime expires or we get some type of signal (all the @@ -636,7 +637,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len) current_time = GetCurrentTimestamp(); LWLockAcquire(AutovacuumLock, LW_SHARED); - can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers); + can_launch = autovac_slot_available(); if (AutoVacuumShmem->av_startingWorker != NULL) { @@ -679,8 +680,8 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len) worker->wi_sharedrel = false; worker->wi_proc = NULL; worker->wi_launchtime = 0; - dlist_push_head(&AutoVacuumShmem->av_freeWorkers, - &worker->wi_links); + dclist_push_head(&AutoVacuumShmem->av_freeWorkers, + &worker->wi_links); AutoVacuumShmem->av_startingWorker = NULL; ereport(WARNING, errmsg("autovacuum worker took too long to start; canceled")); @@ -1087,7 +1088,7 @@ do_start_worker(void) /* return quickly when there are no free workers */ LWLockAcquire(AutovacuumLock, LW_SHARED); - if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers)) + if (!autovac_slot_available()) { LWLockRelease(AutovacuumLock); return InvalidOid; @@ -1240,7 +1241,7 @@ do_start_worker(void) * Get a worker entry from the freelist. We checked above, so there * really should be a free slot. */ - wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers); + wptr = dclist_pop_head_node(&AutoVacuumShmem->av_freeWorkers); worker = dlist_container(WorkerInfoData, wi_links, wptr); worker->wi_dboid = avdb->adw_datid; @@ -1609,8 +1610,8 @@ FreeWorkerInfo(int code, Datum arg) MyWorkerInfo->wi_proc = NULL; MyWorkerInfo->wi_launchtime = 0; pg_atomic_clear_flag(&MyWorkerInfo->wi_dobalance); - dlist_push_head(&AutoVacuumShmem->av_freeWorkers, - &MyWorkerInfo->wi_links); + dclist_push_head(&AutoVacuumShmem->av_freeWorkers, + &MyWorkerInfo->wi_links); /* not mine anymore */ MyWorkerInfo = NULL; @@ -3292,7 +3293,7 @@ AutoVacuumShmemInit(void) Assert(!found); AutoVacuumShmem->av_launcherpid = 0; - dlist_init(&AutoVacuumShmem->av_freeWorkers); + dclist_init(&AutoVacuumShmem->av_freeWorkers); dlist_init(&AutoVacuumShmem->av_runningWorkers); AutoVacuumShmem->av_startingWorker = NULL; memset(AutoVacuumShmem->av_workItems, 0, @@ -3304,8 +3305,8 @@ AutoVacuumShmemInit(void) /* initialize the WorkerInfo free list */ for (i = 0; i < autovacuum_max_workers; i++) { - dlist_push_head(&AutoVacuumShmem->av_freeWorkers, - &worker[i].wi_links); + dclist_push_head(&AutoVacuumShmem->av_freeWorkers, + &worker[i].wi_links); pg_atomic_init_flag(&worker[i].wi_dobalance); } @@ -3341,3 +3342,12 @@ check_autovacuum_work_mem(int *newval, void **extra, GucSource source) return true; } + +static bool +autovac_slot_available(void) +{ + const dclist_head *freelist = &AutoVacuumShmem->av_freeWorkers; + int reserved_slots = autovacuum_max_workers - autovacuum_workers; + + return dclist_count(freelist) > Max(0, reserved_slots); +} diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index c68fdc008b..29eb22939a 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -3400,14 +3400,23 @@ struct config_int ConfigureNamesInt[] = 400000000, 10000, 2000000000, NULL, NULL, NULL }, + { + {"autovacuum_workers", PGC_SIGHUP, AUTOVACUUM, + gettext_noop("Sets the maximum number of simultaneously running autovacuum worker processes."), + NULL + }, + &autovacuum_workers, + 3, 1, MAX_BACKENDS, + NULL, NULL, NULL + }, { /* see max_connections */ {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Sets the maximum number of simultaneously running autovacuum worker processes."), + gettext_noop("Sets the maximum effective value of autovacuum_workers."), NULL }, &autovacuum_max_workers, - 3, 1, MAX_BACKENDS, + 16, 1, MAX_BACKENDS, check_autovacuum_max_workers, NULL, NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2166ea4a87..f5bc403041 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -658,7 +658,8 @@ #autovacuum = on # Enable autovacuum subprocess? 'on' # requires track_counts to also be on. -#autovacuum_max_workers = 3 # max number of autovacuum subprocesses +#autovacuum_workers = 3 # max number of autovacuum subprocesses +#autovacuum_max_workers = 16 # effective limit for autovacuum_workers # (change requires restart) #autovacuum_naptime = 1min # time between autovacuum runs #autovacuum_vacuum_threshold = 50 # min number of row updates before diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index cae1e8b329..fb2936c161 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -28,6 +28,7 @@ typedef enum /* GUC variables */ extern PGDLLIMPORT bool autovacuum_start_daemon; +extern PGDLLIMPORT int autovacuum_workers; extern PGDLLIMPORT int autovacuum_max_workers; extern PGDLLIMPORT int autovacuum_work_mem; extern PGDLLIMPORT int autovacuum_naptime; diff --git a/src/test/modules/xid_wraparound/t/001_emergency_vacuum.pl b/src/test/modules/xid_wraparound/t/001_emergency_vacuum.pl index 37550b67a4..fb20c9084c 100644 --- a/src/test/modules/xid_wraparound/t/001_emergency_vacuum.pl +++ b/src/test/modules/xid_wraparound/t/001_emergency_vacuum.pl @@ -21,7 +21,7 @@ $node->append_conf( autovacuum = off # run autovacuum only when to anti wraparound autovacuum_naptime = 1s # so it's easier to verify the order of operations -autovacuum_max_workers = 1 +autovacuum_workers = 1 log_autovacuum_min_duration = 0 ]); $node->start; diff --git a/src/test/modules/xid_wraparound/t/003_wraparounds.pl b/src/test/modules/xid_wraparound/t/003_wraparounds.pl index 88063b4b52..35f2e1029a 100644 --- a/src/test/modules/xid_wraparound/t/003_wraparounds.pl +++ b/src/test/modules/xid_wraparound/t/003_wraparounds.pl @@ -24,7 +24,7 @@ $node->append_conf( autovacuum = off # run autovacuum only when to anti wraparound autovacuum_naptime = 1s # so it's easier to verify the order of operations -autovacuum_max_workers = 1 +autovacuum_workers = 1 log_autovacuum_min_duration = 0 ]); $node->start;