Here is a rebased patch. One thing that still bugs me is that there is no feedback sent to the user when autovacuum_max_workers is set higher than autovacuum_worker_slots. I think we should at least emit a WARNING, perhaps from the autovacuum launcher, i.e., once when the launcher starts and then again as needed via HandleAutoVacLauncherInterrupts(). Or we could fail to start in PostmasterMain() and then ignore later misconfigurations via a GUC check hook. I'm not too thrilled about adding more GUC check hooks that depend on the value of other GUCs, but I do like the idea of failing instead of silently proceeding with a different value than the user configured. Any thoughts?
-- nathan
>From bd486d1ab302c4654b9cfbc57230bcf9b140711e Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Sat, 22 Jun 2024 15:05:44 -0500 Subject: [PATCH v7 1/1] allow changing autovacuum_max_workers without restarting --- doc/src/sgml/config.sgml | 28 +++++++++++- doc/src/sgml/runtime.sgml | 12 ++--- src/backend/access/transam/xlog.c | 2 +- src/backend/postmaster/autovacuum.c | 44 ++++++++++++------- src/backend/postmaster/postmaster.c | 2 +- src/backend/storage/lmgr/proc.c | 6 +-- src/backend/utils/init/postinit.c | 6 +-- src/backend/utils/misc/guc_tables.c | 11 ++++- src/backend/utils/misc/postgresql.conf.sample | 3 +- src/include/postmaster/autovacuum.h | 1 + src/test/perl/PostgreSQL/Test/Cluster.pm | 1 + 11 files changed, 83 insertions(+), 33 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f627a3e63c..da30d1ea4f 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8544,6 +8544,25 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-autovacuum-worker-slots" xreflabel="autovacuum_worker_slots"> + <term><varname>autovacuum_worker_slots</varname> (<type>integer</type>) + <indexterm> + <primary><varname>autovacuum_worker_slots</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the number of backend slots to reserve for autovacuum worker + processes. The default is 16. This parameter can only be set at server + start. + </para> + <para> + When changing this value, consider also adjusting + <xref linkend="guc-autovacuum-max-workers"/>. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-autovacuum-max-workers" xreflabel="autovacuum_max_workers"> <term><varname>autovacuum_max_workers</varname> (<type>integer</type>) <indexterm> @@ -8554,7 +8573,14 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; <para> Specifies the maximum number of autovacuum processes (other than the autovacuum launcher) that may be running at any one time. The default - is three. This parameter can only be set at server start. + is three. This parameter can only be set in the + <filename>postgresql.conf</filename> file or on the server command line. + </para> + <para> + Note that a setting for this value which is higher than + <xref linkend="guc-autovacuum-worker-slots"/> will have no effect, + since autovacuum workers are taken from the pool of slots established + by that setting. </para> </listitem> </varlistentry> diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 2f7c618886..4bb37faffe 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -781,13 +781,13 @@ psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such <row> <entry><varname>SEMMNI</varname></entry> <entry>Maximum number of semaphore identifiers (i.e., sets)</entry> - <entry>at least <literal>ceil((max_connections + autovacuum_max_workers + max_wal_senders + max_worker_processes + 7) / 16)</literal> plus room for other applications</entry> + <entry>at least <literal>ceil((max_connections + autovacuum_worker_slots + max_wal_senders + max_worker_processes + 7) / 16)</literal> plus room for other applications</entry> </row> <row> <entry><varname>SEMMNS</varname></entry> <entry>Maximum number of semaphores system-wide</entry> - <entry><literal>ceil((max_connections + autovacuum_max_workers + max_wal_senders + max_worker_processes + 7) / 16) * 17</literal> plus room for other applications</entry> + <entry><literal>ceil((max_connections + autovacuum_worker_slots + max_wal_senders + max_worker_processes + 7) / 16) * 17</literal> plus room for other applications</entry> </row> <row> @@ -838,7 +838,7 @@ psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such When using System V semaphores, <productname>PostgreSQL</productname> uses one semaphore per allowed connection (<xref linkend="guc-max-connections"/>), allowed autovacuum worker process - (<xref linkend="guc-autovacuum-max-workers"/>), allowed WAL sender process + (<xref linkend="guc-autovacuum-worker-slots"/>), allowed WAL sender process (<xref linkend="guc-max-wal-senders"/>), and allowed background process (<xref linkend="guc-max-worker-processes"/>), in sets of 16. Each such set will @@ -847,13 +847,13 @@ psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such other applications. The maximum number of semaphores in the system is set by <varname>SEMMNS</varname>, which consequently must be at least as high as <varname>max_connections</varname> plus - <varname>autovacuum_max_workers</varname> plus <varname>max_wal_senders</varname>, + <varname>autovacuum_worker_slots</varname> plus <varname>max_wal_senders</varname>, plus <varname>max_worker_processes</varname>, plus one extra for each 16 allowed connections plus workers (see the formula in <xref linkend="sysvipc-parameters"/>). The parameter <varname>SEMMNI</varname> determines the limit on the number of semaphore sets that can exist on the system at one time. Hence this parameter must be at - least <literal>ceil((max_connections + autovacuum_max_workers + max_wal_senders + max_worker_processes + 7) / 16)</literal>. + least <literal>ceil((max_connections + autovacuum_worker_slots + max_wal_senders + max_worker_processes + 7) / 16)</literal>. Lowering the number of allowed connections is a temporary workaround for failures, which are usually confusingly worded <quote>No space @@ -884,7 +884,7 @@ psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such When using POSIX semaphores, the number of semaphores needed is the same as for System V, that is one semaphore per allowed connection (<xref linkend="guc-max-connections"/>), allowed autovacuum worker process - (<xref linkend="guc-autovacuum-max-workers"/>), allowed WAL sender process + (<xref linkend="guc-autovacuum-worker-slots"/>), allowed WAL sender process (<xref linkend="guc-max-wal-senders"/>), and allowed background process (<xref linkend="guc-max-worker-processes"/>). On the platforms where this option is preferred, there is no specific diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 33e27a6e72..816f9f2b4b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5362,7 +5362,7 @@ CheckRequiredParameterValues(void) */ if (ArchiveRecoveryRequested && EnableHotStandby) { - /* We ignore autovacuum_max_workers when we make this test. */ + /* We ignore autovacuum_worker_slots when we make this test. */ RecoveryRequiresIntParameter("max_connections", MaxConnections, ControlFile->MaxConnections); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 928754b51c..565e14ca9b 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -114,6 +114,7 @@ * GUC parameters */ bool autovacuum_start_daemon = false; +int autovacuum_worker_slots; int autovacuum_max_workers; int autovacuum_work_mem = -1; int autovacuum_naptime; @@ -209,7 +210,7 @@ typedef struct autovac_table /*------------- * This struct holds information about a single worker's whereabouts. We keep * an array of these in shared memory, sized according to - * autovacuum_max_workers. + * autovacuum_worker_slots. * * wi_links entry into free list or running list * wi_dboid OID of the database this worker is supposed to work on @@ -289,7 +290,7 @@ typedef struct { sig_atomic_t av_signal[AutoVacNumSignals]; pid_t av_launcherpid; - dlist_head av_freeWorkers; + dclist_head av_freeWorkers; dlist_head av_runningWorkers; WorkerInfo av_startingWorker; AutoVacuumWorkItem av_workItems[NUM_WORKITEMS]; @@ -347,6 +348,7 @@ static void autovac_report_activity(autovac_table *tab); static void autovac_report_workitem(AutoVacuumWorkItem *workitem, const char *nspname, const char *relname); static void avl_sigusr2_handler(SIGNAL_ARGS); +static bool av_worker_available(void); @@ -575,8 +577,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len) * wakening conditions. */ - launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers), - false, &nap); + launcher_determine_sleep(av_worker_available(), false, &nap); /* * Wait until naptime expires or we get some type of signal (all the @@ -636,7 +637,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len) current_time = GetCurrentTimestamp(); LWLockAcquire(AutovacuumLock, LW_SHARED); - can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers); + can_launch = av_worker_available(); if (AutoVacuumShmem->av_startingWorker != NULL) { @@ -679,8 +680,8 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len) worker->wi_sharedrel = false; worker->wi_proc = NULL; worker->wi_launchtime = 0; - dlist_push_head(&AutoVacuumShmem->av_freeWorkers, - &worker->wi_links); + dclist_push_head(&AutoVacuumShmem->av_freeWorkers, + &worker->wi_links); AutoVacuumShmem->av_startingWorker = NULL; ereport(WARNING, errmsg("autovacuum worker took too long to start; canceled")); @@ -1087,7 +1088,7 @@ do_start_worker(void) /* return quickly when there are no free workers */ LWLockAcquire(AutovacuumLock, LW_SHARED); - if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers)) + if (!av_worker_available()) { LWLockRelease(AutovacuumLock); return InvalidOid; @@ -1240,7 +1241,7 @@ do_start_worker(void) * Get a worker entry from the freelist. We checked above, so there * really should be a free slot. */ - wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers); + wptr = dclist_pop_head_node(&AutoVacuumShmem->av_freeWorkers); worker = dlist_container(WorkerInfoData, wi_links, wptr); worker->wi_dboid = avdb->adw_datid; @@ -1609,8 +1610,8 @@ FreeWorkerInfo(int code, Datum arg) MyWorkerInfo->wi_proc = NULL; MyWorkerInfo->wi_launchtime = 0; pg_atomic_clear_flag(&MyWorkerInfo->wi_dobalance); - dlist_push_head(&AutoVacuumShmem->av_freeWorkers, - &MyWorkerInfo->wi_links); + dclist_push_head(&AutoVacuumShmem->av_freeWorkers, + &MyWorkerInfo->wi_links); /* not mine anymore */ MyWorkerInfo = NULL; @@ -3265,7 +3266,7 @@ AutoVacuumShmemSize(void) */ size = sizeof(AutoVacuumShmemStruct); size = MAXALIGN(size); - size = add_size(size, mul_size(autovacuum_max_workers, + size = add_size(size, mul_size(autovacuum_worker_slots, sizeof(WorkerInfoData))); return size; } @@ -3292,7 +3293,7 @@ AutoVacuumShmemInit(void) Assert(!found); AutoVacuumShmem->av_launcherpid = 0; - dlist_init(&AutoVacuumShmem->av_freeWorkers); + dclist_init(&AutoVacuumShmem->av_freeWorkers); dlist_init(&AutoVacuumShmem->av_runningWorkers); AutoVacuumShmem->av_startingWorker = NULL; memset(AutoVacuumShmem->av_workItems, 0, @@ -3302,10 +3303,10 @@ AutoVacuumShmemInit(void) MAXALIGN(sizeof(AutoVacuumShmemStruct))); /* initialize the WorkerInfo free list */ - for (i = 0; i < autovacuum_max_workers; i++) + for (i = 0; i < autovacuum_worker_slots; i++) { - dlist_push_head(&AutoVacuumShmem->av_freeWorkers, - &worker[i].wi_links); + dclist_push_head(&AutoVacuumShmem->av_freeWorkers, + &worker[i].wi_links); pg_atomic_init_flag(&worker[i].wi_dobalance); } @@ -3341,3 +3342,14 @@ check_autovacuum_work_mem(int *newval, void **extra, GucSource source) return true; } + +/* + * Returns whether there is a free autovacuum worker slot available. + */ +static bool +av_worker_available(void) +{ + int reserved = autovacuum_worker_slots - autovacuum_max_workers; + + return dclist_count(&AutoVacuumShmem->av_freeWorkers) > Max(0, reserved); +} diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 6f974a8d21..e4c824fcb1 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -4144,7 +4144,7 @@ CreateOptsFile(int argc, char *argv[], char *fullprogname) int MaxLivePostmasterChildren(void) { - return 2 * (MaxConnections + autovacuum_max_workers + 1 + + return 2 * (MaxConnections + autovacuum_worker_slots + 1 + max_wal_senders + max_worker_processes); } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 1b23efb26f..c20c9338ec 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -142,7 +142,7 @@ ProcGlobalSemas(void) * So, now we grab enough semaphores to support the desired max number * of backends immediately at initialization --- if the sysadmin has set * MaxConnections, max_worker_processes, max_wal_senders, or - * autovacuum_max_workers higher than his kernel will support, he'll + * autovacuum_worker_slots higher than his kernel will support, he'll * find out sooner rather than later. * * Another reason for creating semaphores here is that the semaphore @@ -242,13 +242,13 @@ InitProcGlobal(void) dlist_push_tail(&ProcGlobal->freeProcs, &proc->links); proc->procgloballist = &ProcGlobal->freeProcs; } - else if (i < MaxConnections + autovacuum_max_workers + 1) + else if (i < MaxConnections + autovacuum_worker_slots + 1) { /* PGPROC for AV launcher/worker, add to autovacFreeProcs list */ dlist_push_tail(&ProcGlobal->autovacFreeProcs, &proc->links); proc->procgloballist = &ProcGlobal->autovacFreeProcs; } - else if (i < MaxConnections + autovacuum_max_workers + 1 + max_worker_processes) + else if (i < MaxConnections + autovacuum_worker_slots + 1 + max_worker_processes) { /* PGPROC for bgworker, add to bgworkerFreeProcs list */ dlist_push_tail(&ProcGlobal->bgworkerFreeProcs, &proc->links); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 25867c8bd5..acbae29baf 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -577,15 +577,15 @@ InitializeMaxBackends(void) Assert(MaxBackends == 0); /* the extra unit accounts for the autovacuum launcher */ - MaxBackends = MaxConnections + autovacuum_max_workers + 1 + + MaxBackends = MaxConnections + autovacuum_worker_slots + 1 + max_worker_processes + max_wal_senders; if (MaxBackends > MAX_BACKENDS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("too many server processes configured"), - errdetail("\"max_connections\" (%d) plus \"autovacuum_max_workers\" (%d) plus \"max_worker_processes\" (%d) plus \"max_wal_senders\" (%d) must be less than %d.", - MaxConnections, autovacuum_max_workers, + errdetail("\"max_connections\" (%d) plus \"autovacuum_worker_slots\" (%d) plus \"max_worker_processes\" (%d) plus \"max_wal_senders\" (%d) must be less than %d.", + MaxConnections, autovacuum_worker_slots, max_worker_processes, max_wal_senders, MAX_BACKENDS))); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 630ed0f162..6ffca198e9 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -3383,7 +3383,16 @@ struct config_int ConfigureNamesInt[] = }, { /* see max_connections */ - {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM, + {"autovacuum_worker_slots", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Sets the number of backend slots to allocate for autovacuum workers."), + NULL + }, + &autovacuum_worker_slots, + 16, 1, MAX_BACKENDS, + NULL, NULL, NULL + }, + { + {"autovacuum_max_workers", PGC_SIGHUP, AUTOVACUUM, gettext_noop("Sets the maximum number of simultaneously running autovacuum worker processes."), NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 9ec9f97e92..52a4d44b59 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -658,8 +658,9 @@ #autovacuum = on # Enable autovacuum subprocess? 'on' # requires track_counts to also be on. -#autovacuum_max_workers = 3 # max number of autovacuum subprocesses +autovacuum_worker_slots = 16 # autovacuum worker slots to allocate # (change requires restart) +#autovacuum_max_workers = 3 # max number of autovacuum subprocesses #autovacuum_naptime = 1min # time between autovacuum runs #autovacuum_vacuum_threshold = 50 # min number of row updates before # vacuum diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index cae1e8b329..190baa699d 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -28,6 +28,7 @@ typedef enum /* GUC variables */ extern PGDLLIMPORT bool autovacuum_start_daemon; +extern PGDLLIMPORT int autovacuum_worker_slots; extern PGDLLIMPORT int autovacuum_max_workers; extern PGDLLIMPORT int autovacuum_work_mem; extern PGDLLIMPORT int autovacuum_naptime; diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 0135c5a795..98a5039709 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -608,6 +608,7 @@ sub init } print $conf "max_wal_senders = 10\n"; print $conf "max_replication_slots = 10\n"; + print $conf "autovacuum_worker_slots = 3\n"; print $conf "wal_log_hints = on\n"; print $conf "hot_standby = on\n"; # conservative settings to ensure we can run multiple postmasters: -- 2.39.3 (Apple Git-146)