Hello hackers,

Couple of years ago Victor Wagner presented a patch [1] that introduced multiple hosts capability and also hostorder and failover_timeout parameters for libpq. Subsequently multi-host feature was reimplemented by Robert Haas and committed. Later target_session_attrs parameter was also added. In this thread I want to revisit hostorder and failover_timeout proposal.

'hostorder' defines the order in which postgres instances listed in connection string will be tried. Possible values are:
* sequential (default)
* random

Random order can be used, for instance, for maintaining load balancing (which is particularly useful in multi-master cluster, but also can be used to load-balance read-only connections to standbys).

'failover_timeout' specifies time span (in seconds) during which libpq would continue attempts to connect to the hosts listed in connection string. If failover_timeout is specified then libpq will loop over hosts again and again until either it successfully connects to one of the hosts or it runs out of time.

I reimplemented 'hostorder' and 'failover_timeout' parameters in the attached patch. I also took some documentation pieces from Victor Wagner's original patch. I'll be glad to see any comments and suggestions. Thanks!

[1] https://www.postgresql.org/message-id/flat/20150818041850.GA5092%40wagner.pp.ru

--
Ildar Musin
i.mu...@postgrespro.ru
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 800e68a..a9ba534 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -845,7 +845,7 @@ postgresql://localhost/mydb
 postgresql://user@localhost
 postgresql://user:secret@localhost
 postgresql://other@localhost/otherdb?connect_timeout=10&application_name=myapp
-postgresql://host1:123,host2:456/somedb?target_session_attrs=any&application_name=myapp
+postgresql://host1:123,host2:456/somedb?hostorder=random&target_session_attrs=any&application_name=myapp
 </programlisting>
     Components of the hierarchical part of the <acronym>URI</acronym> can also
     be given as parameters.  For example:
@@ -910,14 +910,15 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
      <title>Specifying Multiple Hosts</title>
 
      <para>
-       It is possible to specify multiple hosts to connect to, so that they are
-       tried in the given order. In the Keyword/Value format, the <literal>host</literal>,
-       <literal>hostaddr</literal>, and <literal>port</literal> options accept a comma-separated
-       list of values. The same number of elements must be given in each option, such
-       that e.g. the first <literal>hostaddr</literal> corresponds to the first host name,
-       the second <literal>hostaddr</literal> corresponds to the second host name, and so
-       forth. As an exception, if only one <literal>port</literal> is specified, it
-       applies to all the hosts.
+       It is possible to specify multiple hosts to connect to. In the Keyword/Value
+       format, the <literal>host</literal>, <literal>hostaddr</literal>, and
+       <literal>port</literal> options accept a comma-separated list of values.
+       The same number of elements must be given in each option, such that e.g. the
+       first <literal>hostaddr</literal> corresponds to the first host name, the second
+       <literal>hostaddr</literal> corresponds to the second host name, and so
+       forth. As an exception, if only one <literal>port</literal> is specified,
+       it applies to all the hosts. The order in which hosts are tried may be
+       specified by <xref linkend="libpq-connect-hostorder"/> parameter.
      </para>
 
      <para>
@@ -968,8 +969,7 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         Unix-domain sockets, the default is to connect to <literal>localhost</literal>.
        </para>
        <para>
-        A comma-separated list of host names is also accepted, in which case
-        each host name in the list is tried in order. See
+        A comma-separated list of host names is also accepted. See
         <xref linkend="libpq-multiple-hosts"/> for details.
        </para>
       </listitem>
@@ -1039,6 +1039,30 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
        </listitem>
       </varlistentry>
 
+      <varlistentry id="libpq-connect-hostorder" xreflabel="hostorder">
+       <term><literal>hostorder</literal></term>
+       <listitem>
+       <para>
+        Specifies the order in which hosts from the list of hosts
+        specified by the <xref linkend="libpq-connect-host"/> parameter are
+        tried.
+       </para>
+       <para>
+        If the value of this argument is <literal>sequential</literal> (the
+        default) connections to the hosts will be attempted in the order in
+        which they are given.
+       </para>
+       <para>
+        If the value is <literal>random</literal>, the host to connect to
+        will be randomly picked from the list. It allows load balacing between
+        several cluster nodes. However, PostgreSQL doesn't currently support
+        multimaster clusters. So, without the use of third-party products,
+        only read-only connections can take advantage from load-balancing.
+        See <xref linkend="libpq-connect-target-session-attrs"/>
+       </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry id="libpq-connect-port" xreflabel="port">
        <term><literal>port</literal></term>
        <listitem>
@@ -1115,6 +1139,29 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
       </listitem>
      </varlistentry>
 
+     <varlistentry id="libpq-connect-falover-timeout" xreflabel="failover_timeout">
+      <term><literal>failover_timeout</literal></term>
+      <listitem>
+      <para>
+       Maximum time to cyclically retry all the hosts in the connection string.
+       (as decimal integer number of seconds). If not specified, then
+       hosts are tried just once.
+      </para>
+      <para>
+       If we have replicating cluster, and master node fails, it might
+       take some time to promote one of the standby nodes to the new master.
+       So clients which detect failure to connect to the master might
+       abandon attempts to reestablish a connection before the new master
+       becomes available.
+      </para>
+      <para>
+       Setting this parameter to a value that takes into account the amount of
+       time needed for failover to complete will ensure attempts to connect
+       to hosts continue to be made until the new master becomes available.
+      </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="libpq-connect-client-encoding" xreflabel="client_encoding">
       <term><literal>client_encoding</literal></term>
       <listitem>
diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile
index abe0a50..18b6a78 100644
--- a/src/interfaces/libpq/Makefile
+++ b/src/interfaces/libpq/Makefile
@@ -35,8 +35,8 @@ OBJS=	fe-auth.o fe-auth-scram.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-l
 	fe-protocol2.o fe-protocol3.o pqexpbuffer.o fe-secure.o \
 	libpq-events.o
 # libpgport C files we always use
-OBJS += chklocale.o inet_net_ntop.o noblock.o pgstrcasecmp.o pqsignal.o \
-	thread.o
+OBJS += chklocale.o inet_net_ntop.o noblock.o pgsleep.o pgstrcasecmp.o \
+	pqsignal.o thread.o
 # libpgport C files that are needed if identified by configure
 OBJS += $(filter crypt.o getaddrinfo.o getpeereid.o inet_aton.o open.o system.o snprintf.o strerror.o strlcpy.o strnlen.o win32error.o win32setlocale.o, $(LIBOBJS))
 
@@ -78,7 +78,7 @@ endif
 # shared library link.  (The order in which you list them here doesn't
 # matter.)
 ifneq ($(PORTNAME), win32)
-SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi_krb5 -lgss -lgssapi -lssl -lsocket -lnsl -lresolv -lintl, $(LIBS)) $(LDAP_LIBS_FE) $(PTHREAD_LIBS)
+SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi_krb5 -lgss -lgssapi -lssl -lsocket -lnsl -lresolv -lintl -lrt, $(LIBS)) $(LDAP_LIBS_FE) $(PTHREAD_LIBS)
 else
 SHLIB_LINK += $(filter -lcrypt -ldes -lcom_err -lcrypto -lk5crypto -lkrb5 -lgssapi32 -lssl -lsocket -lnsl -lresolv -lintl $(PTHREAD_LIBS), $(LIBS)) $(LDAP_LIBS_FE)
 endif
diff --git a/src/interfaces/libpq/fe-auth.c b/src/interfaces/libpq/fe-auth.c
index 3b2073a..f908a32 100644
--- a/src/interfaces/libpq/fe-auth.c
+++ b/src/interfaces/libpq/fe-auth.c
@@ -44,6 +44,9 @@
 #include "fe-auth.h"
 
 
+#define CURRENT_HOST(conn) \
+	((conn)->connhost[(conn)->connaddr[(conn)->whichaddr].hostidx])
+
 #ifdef ENABLE_GSS
 /*
  * GSSAPI authentication system.
@@ -558,7 +561,7 @@ pg_SASL_init(PGconn *conn, int payloadlen)
 	 * password, so we can just go ahead here without further distinction.
 	 */
 	conn->password_needed = true;
-	password = conn->connhost[conn->whichhost].password;
+	password = CURRENT_HOST(conn).password;
 	if (password == NULL)
 		password = conn->pgpass;
 	if (password == NULL || password[0] == '\0')
@@ -967,7 +970,7 @@ pg_fe_sendauth(AuthRequest areq, int payloadlen, PGconn *conn)
 				char	   *password;
 
 				conn->password_needed = true;
-				password = conn->connhost[conn->whichhost].password;
+				password = CURRENT_HOST(conn).password;
 				if (password == NULL)
 					password = conn->pgpass;
 				if (password == NULL || password[0] == '\0')
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index f3057e9..07ea7e5 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -75,6 +75,8 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
 #include "mb/pg_wchar.h"
 #include "port/pg_bswap.h"
 
+#include "portability/instr_time.h"
+
 
 #ifndef WIN32
 #define PGPASSFILE ".pgpass"
@@ -123,6 +125,7 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
 #define DefaultOption	""
 #define DefaultAuthtype		  ""
 #define DefaultTargetSessionAttrs	"any"
+#define DefaultHostorder	"sequential"
 #define DefaultSCRAMChannelBinding	SCRAM_CHANNEL_BINDING_TLS_UNIQUE
 #ifdef USE_SSL
 #define DefaultSSLMode "prefer"
@@ -130,6 +133,11 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
 #define DefaultSSLMode	"disable"
 #endif
 
+#define CURRENT_HOST(conn) \
+	((conn)->connhost[(conn)->connaddr[(conn)->whichaddr].hostidx])
+#define SHOULD_TRY_NEXT_ADDR(conn) \
+	((conn)->whichaddr + 1 < (conn)->nconnaddr || (conn)->failover_timeout)
+
 /* ----------
  * Definition of the conninfo parameters and their fallback resources.
  *
@@ -330,6 +338,14 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 		"Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
+	{"hostorder", NULL, DefaultHostorder, NULL,
+		"Hostorder", "", 10, /* sizeof("sequential") = 10 */
+	offsetof(struct pg_conn, hostorder)},
+
+	{"failover_timeout", NULL, NULL, NULL,
+		"Failover Timeout", "", 10,
+	offsetof(struct pg_conn, failover_timeout)},
+
 	/* Terminating entry --- MUST BE LAST */
 	{NULL, NULL, NULL, NULL,
 	NULL, NULL, 0}
@@ -410,6 +426,7 @@ static char *passwordFromFile(const char *hostname, const char *port, const char
 				 const char *username, const char *pgpassfile);
 static void pgpassfileWarning(PGconn *conn);
 static void default_threadlock(int acquire);
+static void displayed_host_port(PGconn *conn, char **displayed_host, char **displayed_port);
 
 
 /* global variable because fe-auth.c needs to access it */
@@ -906,7 +923,7 @@ connectOptions2(PGconn *conn)
 	 * try to connect.  For that, count the number of elements in the hostaddr
 	 * or host options.  If neither is given, assume one host.
 	 */
-	conn->whichhost = 0;
+	conn->whichaddr = 0;
 	if (conn->pghostaddr && conn->pghostaddr[0] != '\0')
 		conn->nconnhost = count_comma_separated_elems(conn->pghostaddr);
 	else if (conn->pghost && conn->pghost[0] != '\0')
@@ -935,6 +952,7 @@ connectOptions2(PGconn *conn)
 				goto oom_error;
 
 			conn->connhost[i].type = CHT_HOST_ADDRESS;
+			conn->connhost[i].readonly = false;
 		}
 
 		/*
@@ -967,6 +985,7 @@ connectOptions2(PGconn *conn)
 					conn->connhost[i].type = CHT_UNIX_SOCKET;
 #endif
 			}
+			conn->connhost[i].readonly = false;
 		}
 		if (more || i != conn->nconnhost)
 		{
@@ -992,6 +1011,7 @@ connectOptions2(PGconn *conn)
 		conn->connhost[0].host = strdup(DefaultHost);
 		conn->connhost[0].type = CHT_HOST_NAME;
 #endif
+		conn->connhost[0].readonly = false;
 		if (conn->connhost[0].host == NULL)
 			goto oom_error;
 	}
@@ -1195,6 +1215,22 @@ connectOptions2(PGconn *conn)
 	}
 
 	/*
+	 * Validate hostorder option.
+	 */
+	if (conn->hostorder)
+	{
+		if (strcmp(conn->hostorder, "sequential") != 0
+			&& strcmp(conn->hostorder, "random") != 0)
+		{
+			conn->status = CONNECTION_BAD;
+			printfPQExpBuffer(&conn->errorMessage,
+							  libpq_gettext("invalid hostorder value: \"%s\"\n"),
+							  conn->hostorder);
+			return false;
+		}
+	}
+
+	/*
 	 * Only if we get this far is it appropriate to try to connect. (We need a
 	 * state flag, rather than just the boolean result of this function, in
 	 * case someone tries to PQreset() the PGconn.)
@@ -1445,17 +1481,17 @@ connectFailureMessage(PGconn *conn, int errorno)
 	else
 #endif							/* HAVE_UNIX_SOCKETS */
 	{
-		char		host_addr[NI_MAXHOST];
-		const char *displayed_host;
-		const char *displayed_port;
+		char	host_addr[NI_MAXHOST];
+		char   *displayed_host;
+		char   *displayed_port;
 		struct sockaddr_storage *addr = &conn->raddr.addr;
 
 		/*
 		 * Optionally display the network address with the hostname. This is
 		 * useful to distinguish between IPv4 and IPv6 connections.
 		 */
-		if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-			strlcpy(host_addr, conn->connhost[conn->whichhost].hostaddr, NI_MAXHOST);
+		if (CURRENT_HOST(conn).type == CHT_HOST_ADDRESS)
+			strlcpy(host_addr, CURRENT_HOST(conn).hostaddr, NI_MAXHOST);
 		else if (addr->ss_family == AF_INET)
 		{
 			if (inet_net_ntop(AF_INET,
@@ -1478,20 +1514,14 @@ connectFailureMessage(PGconn *conn, int errorno)
 			strcpy(host_addr, "???");
 
 		/* To which host and port were we actually connecting? */
-		if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-			displayed_host = conn->connhost[conn->whichhost].hostaddr;
-		else
-			displayed_host = conn->connhost[conn->whichhost].host;
-		displayed_port = conn->connhost[conn->whichhost].port;
-		if (displayed_port == NULL || displayed_port[0] == '\0')
-			displayed_port = DEF_PGPORT_STR;
+		displayed_host_port(conn, &displayed_host, &displayed_port);
 
 		/*
 		 * If the user did not supply an IP address using 'hostaddr', and
 		 * 'host' was missing or does not match our lookup, display the
 		 * looked-up IP address.
 		 */
-		if (conn->connhost[conn->whichhost].type != CHT_HOST_ADDRESS &&
+		if (CURRENT_HOST(conn).type != CHT_HOST_ADDRESS &&
 			strcmp(displayed_host, host_addr) != 0)
 			appendPQExpBuffer(&conn->errorMessage,
 							  libpq_gettext("could not connect to server: %s\n"
@@ -1691,6 +1721,9 @@ connectDBStart(PGconn *conn)
 	int			ret;
 	int			i;
 
+	pg_conn_address addrs[1024];	/* TODO */
+	int			naddrs = 0;
+
 	if (!conn)
 		return 0;
 
@@ -1794,6 +1827,60 @@ connectDBStart(PGconn *conn)
 			conn->options_valid = false;
 			goto connect_errReturn;
 		}
+
+		if (ch->addrlist)
+		{
+			struct addrinfo *addr;
+
+			for (addr = ch->addrlist; addr != NULL; addr = addr->ai_next)
+			{
+				addrs[naddrs].info = addr;
+				addrs[naddrs++].hostidx = i;
+			}
+		}
+	}
+
+	if (naddrs)
+	{
+		Size connaddr_sz = sizeof(pg_conn_address) * naddrs;
+
+		conn->connaddr = malloc(connaddr_sz);
+		if (!conn->connaddr)
+		{
+			printfPQExpBuffer(&conn->errorMessage,
+							  libpq_gettext("out of memory\n"));
+			goto connect_errReturn;
+		}
+		memcpy((char *) conn->connaddr, (char *) addrs, connaddr_sz);
+		conn->nconnaddr = naddrs;
+
+		/*
+		 * If random hostorder is specified then permutate original
+		 * addresses array
+		 */
+		if (conn->hostorder != NULL
+			&& strcmp(conn->hostorder, "random") == 0)
+		{
+			int			i;
+			instr_time	t;
+
+			INSTR_TIME_SET_CURRENT(t);
+			srandom((unsigned int) INSTR_TIME_GET_MICROSEC(t));
+
+			for (i = 0; i < naddrs; i++)
+			{
+				long r1 = random() % naddrs,
+					 r2 = random() % naddrs;
+				pg_conn_address tmp;
+
+				if (r1 == r2)
+					continue;
+
+				tmp = conn->connaddr[r1];
+				conn->connaddr[r1] = conn->connaddr[r2];
+				conn->connaddr[r2] = tmp;
+			}
+		}
 	}
 
 #ifdef USE_SSL
@@ -1807,11 +1894,15 @@ connectDBStart(PGconn *conn)
 	/*
 	 * Set up to try to connect, with protocol 3.0 as the first attempt.
 	 */
-	conn->whichhost = 0;
-	conn->addr_cur = conn->connhost[0].addrlist;
+	conn->whichaddr = 0;
 	conn->pversion = PG_PROTOCOL(3, 0);
 	conn->send_appname = true;
 	conn->status = CONNECTION_NEEDED;
+	if (conn->failover_timeout)
+		conn->failover_finish_time = time(NULL) + atoi(conn->failover_timeout);
+	else
+		conn->failover_finish_time = (time_t) 0;		/* it is in past, so its
+														 * ok */
 
 	/*
 	 * The code for processing CONNECTION_NEEDED state is in PQconnectPoll(),
@@ -1914,9 +2005,9 @@ connectDBComplete(PGconn *conn)
 			 * If there are no more hosts, return (the error message is
 			 * already set)
 			 */
-			if (++conn->whichhost >= conn->nconnhost)
+			if (conn->whichaddr + 1 >= conn->nconnaddr)
 			{
-				conn->whichhost = 0;
+				conn->whichaddr = 0;
 				conn->status = CONNECTION_BAD;
 				return 0;
 			}
@@ -1926,7 +2017,6 @@ connectDBComplete(PGconn *conn)
 			 * connect_timeout timer
 			 */
 			pqDropConnection(conn, true);
-			conn->addr_cur = conn->connhost[conn->whichhost].addrlist;
 			conn->status = CONNECTION_NEEDED;
 			if (conn->connect_timeout != NULL)
 				finish_time = time(NULL) + timeout;
@@ -2071,30 +2161,56 @@ keep_going:						/* We will come back to here until there is
 			{
 				/*
 				 * Try to initiate a connection to one of the addresses
-				 * returned by pg_getaddrinfo_all().  conn->addr_cur is the
-				 * next one to try. We fail when we run out of addresses.
+				 * returned by pg_getaddrinfo_all(). conn->whichaddr is the
+				 * index in conn->connaddrs which points to the next address
+				 * to try. We fail when we run out of addresses.
 				 */
 				for (;;)
 				{
 					struct addrinfo *addr_cur;
 
-					/*
-					 * Advance to next possible host, if we've tried all of
-					 * the addresses for the current host.
-					 */
-					if (conn->addr_cur == NULL)
+					/* No more addresses */
+					if (conn->whichaddr >= conn->nconnaddr)
 					{
-						if (++conn->whichhost >= conn->nconnhost)
+						if (time(NULL) < conn->failover_finish_time)
 						{
-							conn->whichhost = 0;
-							break;
+							/*
+							 * If failover timeout is set, retry list of hosts
+							 * from the beginning
+							 */
+							pg_usleep(1000000);
+							conn->whichaddr = 0;
+
+							/*
+							 * Reset readonly flag for all hosts as one of
+							 * them might have been promoted
+							 */
+							if (strcmp(conn->target_session_attrs, "read-write") == 0)
+							{
+								int i;
+
+								for (i = 0; i < conn->nconnhost; i++)
+									conn->connhost[i].readonly = false;
+							}
 						}
-						conn->addr_cur =
-							conn->connhost[conn->whichhost].addrlist;
+						else
+							break;	/* Stop trying */
+					}
+
+					/*
+					 * If read-write connection is required check whether
+					 * host was already marked as read-only
+					 */
+					if (conn->target_session_attrs != NULL
+						&& strcmp(conn->target_session_attrs, "read-write") == 0
+						&& CURRENT_HOST(conn).readonly)
+					{
+						conn->whichaddr++;
+						continue;
 					}
 
 					/* Remember current address for possible error msg */
-					addr_cur = conn->addr_cur;
+					addr_cur = conn->connaddr[conn->whichaddr].info;
 					memcpy(&conn->raddr.addr, addr_cur->ai_addr,
 						   addr_cur->ai_addrlen);
 					conn->raddr.salen = addr_cur->ai_addrlen;
@@ -2106,10 +2222,9 @@ keep_going:						/* We will come back to here until there is
 						 * ignore socket() failure if we have more addresses
 						 * to try
 						 */
-						if (addr_cur->ai_next != NULL ||
-							conn->whichhost + 1 < conn->nconnhost)
+						if (SHOULD_TRY_NEXT_ADDR(conn))
 						{
-							conn->addr_cur = addr_cur->ai_next;
+							conn->whichaddr++;
 							continue;
 						}
 						appendPQExpBuffer(&conn->errorMessage,
@@ -2128,7 +2243,7 @@ keep_going:						/* We will come back to here until there is
 						if (!connectNoDelay(conn))
 						{
 							pqDropConnection(conn, true);
-							conn->addr_cur = addr_cur->ai_next;
+							conn->whichaddr++;
 							continue;
 						}
 					}
@@ -2138,7 +2253,7 @@ keep_going:						/* We will come back to here until there is
 										  libpq_gettext("could not set socket to nonblocking mode: %s\n"),
 										  SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
 						pqDropConnection(conn, true);
-						conn->addr_cur = addr_cur->ai_next;
+						conn->whichaddr++;
 						continue;
 					}
 
@@ -2149,7 +2264,7 @@ keep_going:						/* We will come back to here until there is
 										  libpq_gettext("could not set socket to close-on-exec mode: %s\n"),
 										  SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
 						pqDropConnection(conn, true);
-						conn->addr_cur = addr_cur->ai_next;
+						conn->whichaddr++;
 						continue;
 					}
 #endif							/* F_SETFD */
@@ -2197,7 +2312,7 @@ keep_going:						/* We will come back to here until there is
 						if (err)
 						{
 							pqDropConnection(conn, true);
-							conn->addr_cur = addr_cur->ai_next;
+							conn->whichaddr++;
 							continue;
 						}
 					}
@@ -2286,9 +2401,9 @@ keep_going:						/* We will come back to here until there is
 					pqDropConnection(conn, true);
 
 					/*
-					 * Try the next address, if any.
+					 * Advance to next address
 					 */
-					conn->addr_cur = addr_cur->ai_next;
+					conn->whichaddr++;
 				}				/* loop over addresses */
 
 				/*
@@ -2334,10 +2449,9 @@ keep_going:						/* We will come back to here until there is
 					 * If more addresses remain, keep trying, just as in the
 					 * case where connect() returned failure immediately.
 					 */
-					if (conn->addr_cur->ai_next != NULL ||
-						conn->whichhost + 1 < conn->nconnhost)
+					if (SHOULD_TRY_NEXT_ADDR(conn))
 					{
-						conn->addr_cur = conn->addr_cur->ai_next;
+						conn->whichaddr++;
 						conn->status = CONNECTION_NEEDED;
 						goto keep_going;
 					}
@@ -3099,8 +3213,8 @@ keep_going:						/* We will come back to here until there is
 			}
 		case CONNECTION_CHECK_WRITABLE:
 			{
-				const char *displayed_host;
-				const char *displayed_port;
+				char *displayed_host;
+				char *displayed_port;
 
 				if (!saveErrorMessage(conn, &savedMessage))
 					goto error_return;
@@ -3128,16 +3242,11 @@ keep_going:						/* We will come back to here until there is
 					val = PQgetvalue(res, 0, 0);
 					if (strncmp(val, "on", 2) == 0)
 					{
-						const char *displayed_host;
-						const char *displayed_port;
+						char *displayed_host;
+						char *displayed_port;
 
-						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-							displayed_host = conn->connhost[conn->whichhost].hostaddr;
-						else
-							displayed_host = conn->connhost[conn->whichhost].host;
-						displayed_port = conn->connhost[conn->whichhost].port;
-						if (displayed_port == NULL || displayed_port[0] == '\0')
-							displayed_port = DEF_PGPORT_STR;
+						CURRENT_HOST(conn).readonly = true;
+						displayed_host_port(conn, &displayed_host, &displayed_port);
 
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
@@ -3152,10 +3261,9 @@ keep_going:						/* We will come back to here until there is
 						sendTerminateConn(conn);
 						pqDropConnection(conn, true);
 
-						/* Skip any remaining addresses for this host. */
-						conn->addr_cur = NULL;
-						if (conn->whichhost + 1 < conn->nconnhost)
+						if (SHOULD_TRY_NEXT_ADDR(conn))
 						{
+							conn->whichaddr++;
 							conn->status = CONNECTION_NEEDED;
 							goto keep_going;
 						}
@@ -3185,13 +3293,7 @@ keep_going:						/* We will come back to here until there is
 					PQclear(res);
 				restoreErrorMessage(conn, &savedMessage);
 
-				if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-					displayed_host = conn->connhost[conn->whichhost].hostaddr;
-				else
-					displayed_host = conn->connhost[conn->whichhost].host;
-				displayed_port = conn->connhost[conn->whichhost].port;
-				if (displayed_port == NULL || displayed_port[0] == '\0')
-					displayed_port = DEF_PGPORT_STR;
+				displayed_host_port(conn, &displayed_host, &displayed_port);
 				appendPQExpBuffer(&conn->errorMessage,
 								  libpq_gettext("test \"SHOW transaction_read_only\" failed "
 												"on server \"%s:%s\"\n"),
@@ -3200,10 +3302,9 @@ keep_going:						/* We will come back to here until there is
 				sendTerminateConn(conn);
 				pqDropConnection(conn, true);
 
-				if (conn->addr_cur->ai_next != NULL ||
-					conn->whichhost + 1 < conn->nconnhost)
+				if (SHOULD_TRY_NEXT_ADDR(conn))
 				{
-					conn->addr_cur = conn->addr_cur->ai_next;
+					conn->whichaddr++;
 					conn->status = CONNECTION_NEEDED;
 					goto keep_going;
 				}
@@ -3438,6 +3539,8 @@ freePGconn(PGconn *conn)
 		free(conn->connhost);
 	}
 
+	if (conn->connaddr)
+		free(conn->connaddr);
 	if (conn->client_encoding_initial)
 		free(conn->client_encoding_initial);
 	if (conn->events)
@@ -3547,7 +3650,6 @@ release_all_addrinfo(PGconn *conn)
 			conn->connhost[i].addrlist = NULL;
 		}
 	}
-	conn->addr_cur = NULL;
 }
 
 /*
@@ -6002,8 +6104,8 @@ PQpass(const PGconn *conn)
 
 	if (!conn)
 		return NULL;
-	if (conn->connhost != NULL)
-		password = conn->connhost[conn->whichhost].password;
+	if (conn->connhost != NULL && conn->connaddr != NULL)
+		password = CURRENT_HOST(conn).password;
 	if (password == NULL)
 		password = conn->pgpass;
 	/* Historically we've returned "" not NULL for no password specified */
@@ -6020,12 +6122,12 @@ PQhost(const PGconn *conn)
 
 	if (conn->connhost != NULL)
 	{
-		if (conn->connhost[conn->whichhost].host != NULL &&
-			conn->connhost[conn->whichhost].host[0] != '\0')
-			return conn->connhost[conn->whichhost].host;
-		else if (conn->connhost[conn->whichhost].hostaddr != NULL &&
-				 conn->connhost[conn->whichhost].hostaddr[0] != '\0')
-			return conn->connhost[conn->whichhost].hostaddr;
+		if (CURRENT_HOST(conn).host != NULL &&
+			CURRENT_HOST(conn).host[0] != '\0')
+			return CURRENT_HOST(conn).host;
+		else if (CURRENT_HOST(conn).hostaddr != NULL &&
+				 CURRENT_HOST(conn).hostaddr[0] != '\0')
+			return CURRENT_HOST(conn).hostaddr;
 	}
 
 	return "";
@@ -6038,7 +6140,7 @@ PQport(const PGconn *conn)
 		return NULL;
 
 	if (conn->connhost != NULL)
-		return conn->connhost[conn->whichhost].port;
+		return CURRENT_HOST(conn).port;
 
 	return "";
 }
@@ -6615,3 +6717,17 @@ PQregisterThreadLock(pgthreadlock_t newhandler)
 
 	return prev;
 }
+
+static void
+displayed_host_port(PGconn *conn, char **displayed_host, char **displayed_port)
+{
+	pg_conn_host *h = &CURRENT_HOST(conn);
+
+	if (h->type == CHT_HOST_ADDRESS)
+		*displayed_host = h->hostaddr;
+	else
+		*displayed_host = h->host;
+	*displayed_port = h->port;
+	if (*displayed_port == NULL || *displayed_port[0] == '\0')
+		*displayed_port = DEF_PGPORT_STR;
+}
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index eba23dc..4e46766 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -313,8 +313,15 @@ typedef struct pg_conn_host
 								 * password file.  only set if the PGconn's
 								 * pgpass field is NULL. */
 	struct addrinfo *addrlist;	/* list of possible backend addresses */
+	bool		readonly;		/* true if host is proved to be read-only */
 } pg_conn_host;
 
+typedef struct pg_conn_address
+{
+	struct addrinfo *info;
+	int			hostidx;		/* host index in connhost array */
+} pg_conn_address;
+
 /*
  * PGconn stores all the state data associated with a single connection
  * to a backend.
@@ -365,6 +372,12 @@ struct pg_conn
 	/* Type of connection to make.  Possible values: any, read-write. */
 	char	   *target_session_attrs;
 
+	/* Connection order. Possible values: sequential, random */
+	char	   *hostorder;
+
+	char	   *failover_timeout;
+	time_t		failover_finish_time;
+
 	/* Optional file to write trace info to */
 	FILE	   *Pfdebug;
 
@@ -394,8 +407,10 @@ struct pg_conn
 
 	/* Support for multiple hosts in connection string */
 	int			nconnhost;		/* # of possible hosts */
-	int			whichhost;		/* host we're currently considering */
 	pg_conn_host *connhost;		/* details about each possible host */
+	int			nconnaddr;		/* # of possible addresses */
+	int			whichaddr;		/* address we're currently considering */
+	pg_conn_address *connaddr;	/* plain array of all addresses */
 
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
@@ -411,7 +426,6 @@ struct pg_conn
 	bool		sigpipe_flag;	/* can we mask SIGPIPE via MSG_NOSIGNAL? */
 
 	/* Transient state needed while establishing connection */
-	struct addrinfo *addr_cur;	/* backend address currently being tried */
 	PGSetenvStatusType setenv_state;	/* for 2.0 protocol only */
 	const PQEnvironmentOption *next_eo;
 	bool		send_appname;	/* okay to send application_name? */

Reply via email to