Hi, 2018-09-14 12:23 GMT+02:00 Masahiko Sawada <sawada.m...@gmail.com>:
>> 2. If we know that this is neither superuser nor replication connection, we >> should check that there are at least (superuser_reserved_connections + >> NumWalSenders() - max_wal_senders) connection slots are available. > > You wanted to mean (superuser_reserved_connections + max_wal_senders - > NumWalSenders()) in the second point? Sure, my bad. Did a mistake when writing an email, but in the attached file it looks good. > > One argrable point of the second option could be that it breaks > backward compatibility of the parameter configurations. That is, the > existing systems need to re-configure the max_connections. So it might > be better to take the first option with > replication_reservd_connections = 0 by default. Please find attached the new version of the patch, which introduces replication_reservd_connections GUC Regards, -- Alexander Kukushkin
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e1073ac6d3..80e6ef9f67 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3059,6 +3059,32 @@ include_dir 'conf.d' </listitem> </varlistentry> + <varlistentry id="guc-replication-reserved-connections" + xreflabel="replication_reserved_connections"> + <term><varname>replication_reserved_connections</varname> + (<type>integer</type>) + <indexterm> + <primary><varname>replication_reserved_connections</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Determines the number of connection <quote>slots</quote> that + are reserved for replication connections. Whenever the number + of active concurrent connections is at least + <varname>max_connections</varname> minus + <varname>replication_reserved_connections</varname> plus + <literal>number of active wal senders</literal>, new + non-superuser and non-replication connections will not be accepted. + </para> + + <para> + The default value is zero. The value should not exceed <varname>max_wal_senders</varname>. + This parameter can only be set at server start. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-max-replication-slots" xreflabel="max_replication_slots"> <term><varname>max_replication_slots</varname> (<type>integer</type>) <indexterm> diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 305ff36258..a5a95ee92c 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -903,6 +903,10 @@ PostmasterMain(int argc, char *argv[]) if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL) ereport(ERROR, (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\""))); + if (replication_reserved_connections > max_wal_senders) + ereport(WARNING, + (errmsg("Value of replication_reserved_connections (%d) exceeds value of max_wal_senders (%d)", + replication_reserved_connections, max_wal_senders))); /* * Other one-time internal sanity checks can go here, if they are fast. diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 370429d746..e64d5ed44d 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -122,6 +122,8 @@ int max_wal_senders = 0; /* the maximum number of concurrent int wal_sender_timeout = 60 * 1000; /* maximum time to send one WAL * data message */ bool log_replication_commands = false; +int replication_reserved_connections = 0; /* the number of connection slots + * reserved for replication connections */ /* * State for WalSndWakeupRequest @@ -2284,6 +2286,10 @@ InitWalSenderSlot(void) walsnd->applyLag = -1; walsnd->state = WALSNDSTATE_STARTUP; walsnd->latch = &MyProc->procLatch; + + /* increment the number of allocated wal sender slots */ + pg_atomic_fetch_add_u32(&WalSndCtl->num_wal_senders, 1); + SpinLockRelease(&walsnd->mutex); /* don't need the lock anymore */ MyWalSnd = (WalSnd *) walsnd; @@ -2317,6 +2323,10 @@ WalSndKill(int code, Datum arg) walsnd->latch = NULL; /* Mark WalSnd struct as no longer being in use. */ walsnd->pid = 0; + + /* decrement the number of allocated wal sender slots */ + pg_atomic_fetch_sub_u32(&WalSndCtl->num_wal_senders, 1); + SpinLockRelease(&walsnd->mutex); } @@ -3033,6 +3043,7 @@ WalSndShmemInit(void) { /* First time through, so initialize */ MemSet(WalSndCtl, 0, WalSndShmemSize()); + pg_atomic_init_u32(&WalSndCtl->num_wal_senders, 0); for (i = 0; i < NUM_SYNC_REP_WAIT_MODE; i++) SHMQueueInit(&(WalSndCtl->SyncRepQueue[i])); @@ -3587,3 +3598,9 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now) Assert(time != 0); return now - time; } + +/* Return the amount of allocated wal_sender slots */ +uint32 NumWalSenders(void) +{ + return pg_atomic_read_u32(&WalSndCtl->num_wal_senders); +} diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 5ef6315d20..436574e85d 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -789,17 +789,28 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, } /* - * The last few connection slots are reserved for superusers. Although - * replication connections currently require superuser privileges, we - * don't allow them to consume the reserved slots, which are intended for - * interactive use. + * The last few connection slots are reserved for superusers and + * replication. Superusers always have a priority over replication + * connections. */ - if ((!am_superuser || am_walsender) && - ReservedBackends > 0 && - !HaveNFreeProcs(ReservedBackends)) - ereport(FATAL, - (errcode(ERRCODE_TOO_MANY_CONNECTIONS), - errmsg("remaining connection slots are reserved for non-replication superuser connections"))); + if (am_walsender) + { + if (ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) + ereport(FATAL, + (errcode(ERRCODE_TOO_MANY_CONNECTIONS), + errmsg("remaining connection slots are reserved for non-replication superuser connections"))); + } + else if (!am_superuser) + { + uint32 n = Min(max_wal_senders, replication_reserved_connections); + + n = ReservedBackends + n - NumWalSenders(); + + if (n > 0 && !HaveNFreeProcs(n)) + ereport(FATAL, + (errcode(ERRCODE_TOO_MANY_CONNECTIONS), + errmsg("remaining connection slots are reserved for replication or superuser connections"))); + } /* Check replication permissions needed for walsender processes. */ if (am_walsender) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 77662aff7f..5203b70ef6 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2527,6 +2527,17 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + /* see max_connections, max_wal_senders and superuser_reserved_connections */ + {"replication_reserved_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS, + gettext_noop("Sets the number of connection slots reserved for replication connections."), + NULL + }, + &replication_reserved_connections, + 0, 0, MAX_BACKENDS, + NULL, NULL, NULL + }, + { /* see max_wal_senders */ {"max_replication_slots", PGC_POSTMASTER, REPLICATION_SENDING, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4e61bc6521..6713833c5b 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -237,6 +237,8 @@ #max_wal_senders = 10 # max number of walsender processes # (change requires restart) +#replication_reserved_connections = 0 # number of connection slots reserved + # for replication connections. (change requires restart) #wal_keep_segments = 0 # in logfile segments; 0 disables #wal_sender_timeout = 60s # in milliseconds; 0 disables diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index 45b72a76db..d91f9348ba 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -36,7 +36,9 @@ extern bool wake_wal_senders; extern int max_wal_senders; extern int wal_sender_timeout; extern bool log_replication_commands; +extern int replication_reserved_connections; +extern uint32 NumWalSenders(void); extern void InitWalSender(void); extern bool exec_replication_command(const char *query_string); extern void WalSndErrorCleanup(void); diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index 4b90477936..3ef8f4d7b8 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -14,6 +14,7 @@ #include "access/xlog.h" #include "nodes/nodes.h" +#include "port/atomics.h" #include "replication/syncrep.h" #include "storage/latch.h" #include "storage/shmem.h" @@ -101,6 +102,8 @@ typedef struct */ bool sync_standbys_defined; + pg_atomic_uint32 num_wal_senders; + WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER]; } WalSndCtlData;