From d0b94f2cb8c37e3a7f21c76ff362ca382347aced Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.haribabu@gmail.com>
Date: Thu, 7 Jun 2018 18:11:51 +1000
Subject: [PATCH] Allow taget-session-attrs to accept prefer-read option

With this new prefer-read option, the connection is preferred
to connect to a read-only server if available in the connection
string, otherwise connect to a read-write server.
---
 doc/src/sgml/libpq.sgml               |  13 ++-
 src/interfaces/libpq/fe-connect.c     | 209 ++++++++++++++++++++++++++--------
 src/interfaces/libpq/libpq-int.h      |   3 +-
 src/test/recovery/t/001_stream_rep.pl |  14 ++-
 4 files changed, 190 insertions(+), 49 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index d67212b831..c89b4267f5 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1592,8 +1592,17 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         successful connection; if it returns <literal>on</literal>, the connection
         will be closed.  If multiple hosts were specified in the connection
         string, any remaining servers will be tried just as if the connection
-        attempt had failed.  The default value of this parameter,
-        <literal>any</literal>, regards all connections as acceptable.
+        attempt had failed.
+      </para>
+      <para>
+        If this parameter is set to <literal>prefer-read</literal>, connections
+        where <literal>SHOW transaction_read_only</literal> returns on are
+        preferred. If no such connections can be found, then a connection
+        that allows read-write transactions will be accepted.
+      </para>
+      <para>
+        The default value of this parameter,<literal>any</literal>, regards all
+        connections as acceptable.
       </para>
       </listitem>
     </varlistentry>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index a7e969d7c1..5a693abd56 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -327,7 +327,7 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 
 	{"target_session_attrs", "PGTARGETSESSIONATTRS",
 		DefaultTargetSessionAttrs, NULL,
-		"Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+		"Target-Session-Attrs", "", 12, /* sizeof("prefer-read") = 12 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
 	/* Terminating entry --- MUST BE LAST */
@@ -1184,7 +1184,8 @@ connectOptions2(PGconn *conn)
 	if (conn->target_session_attrs)
 	{
 		if (strcmp(conn->target_session_attrs, "any") != 0
-			&& strcmp(conn->target_session_attrs, "read-write") != 0)
+			&& strcmp(conn->target_session_attrs, "read-write") != 0
+			&& strcmp(conn->target_session_attrs, "prefer-read") != 0)
 		{
 			conn->status = CONNECTION_BAD;
 			printfPQExpBuffer(&conn->errorMessage,
@@ -2086,8 +2087,23 @@ keep_going:						/* We will come back to here until there is
 					{
 						if (++conn->whichhost >= conn->nconnhost)
 						{
-							conn->whichhost = 0;
-							break;
+							if (conn->read_write_host_index >= 0)
+							{
+								/*
+								 * Go to here means failed to connect to
+								 * read-only servers and now try connect to
+								 * read-write server again. Only under the
+								 * 'prefer-read' scenario will go to here.
+								 */
+								conn->addr_cur = conn->connhost[conn->read_write_host_index].addrlist;
+								conn->whichhost = conn->read_write_host_index;
+								conn->read_write_host_index = -2;
+							}
+							else
+							{
+								conn->whichhost = 0;
+								break;
+							}
 						}
 						conn->addr_cur =
 							conn->connhost[conn->whichhost].addrlist;
@@ -2112,6 +2128,14 @@ keep_going:						/* We will come back to here until there is
 							conn->addr_cur = addr_cur->ai_next;
 							continue;
 						}
+						else if (conn->read_write_host_index >= 0)
+						{
+							conn->addr_cur = conn->connhost[conn->read_write_host_index].addrlist;
+							conn->whichhost = conn->read_write_host_index;
+							conn->read_write_host_index = -2;
+							continue;
+						}
+
 						appendPQExpBuffer(&conn->errorMessage,
 										  libpq_gettext("could not create socket: %s\n"),
 										  SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
@@ -2341,6 +2365,14 @@ keep_going:						/* We will come back to here until there is
 						conn->status = CONNECTION_NEEDED;
 						goto keep_going;
 					}
+					else if (conn->read_write_host_index >= 0)
+					{
+						conn->addr_cur = conn->connhost[conn->read_write_host_index].addrlist;
+						conn->whichhost = conn->read_write_host_index;
+						conn->read_write_host_index = -2;
+						conn->status = CONNECTION_NEEDED;
+						goto keep_going;
+					}
 					goto error_return;
 				}
 
@@ -2978,10 +3010,12 @@ keep_going:						/* We will come back to here until there is
 				}
 
 				/*
-				 * If a read-write connection is required, see if we have one.
+				 * If a read-write or prefer-read connection is required, see
+				 * if we have one.
 				 */
 				if (conn->target_session_attrs != NULL &&
-					strcmp(conn->target_session_attrs, "read-write") == 0)
+					(strcmp(conn->target_session_attrs, "read-write") == 0 ||
+					 strcmp(conn->target_session_attrs, "prefer-read") == 0))
 				{
 					/*
 					 * We are yet to make a connection. Save all existing
@@ -3042,10 +3076,12 @@ keep_going:						/* We will come back to here until there is
 			}
 
 			/*
-			 * If a read-write connection is requested check for same.
+			 * If a read-write or prefer-read connection is requested check
+			 * for same.
 			 */
 			if (conn->target_session_attrs != NULL &&
-				strcmp(conn->target_session_attrs, "read-write") == 0)
+				(strcmp(conn->target_session_attrs, "read-write") == 0 ||
+				 strcmp(conn->target_session_attrs, "prefer-read") == 0))
 			{
 				if (!saveErrorMessage(conn, &savedMessage))
 					goto error_return;
@@ -3128,53 +3164,134 @@ keep_going:						/* We will come back to here until there is
 					val = PQgetvalue(res, 0, 0);
 					if (strncmp(val, "on", 2) == 0)
 					{
-						const char *displayed_host;
-						const char *displayed_port;
+						if (strcmp(conn->target_session_attrs, "read-write") == 0)
+						{
+							if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+								displayed_host = conn->connhost[conn->whichhost].hostaddr;
+							else
+								displayed_host = conn->connhost[conn->whichhost].host;
+							displayed_port = conn->connhost[conn->whichhost].port;
+							if (displayed_port == NULL || displayed_port[0] == '\0')
+								displayed_port = DEF_PGPORT_STR;
 
-						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-							displayed_host = conn->connhost[conn->whichhost].hostaddr;
-						else
-							displayed_host = conn->connhost[conn->whichhost].host;
-						displayed_port = conn->connhost[conn->whichhost].port;
-						if (displayed_port == NULL || displayed_port[0] == '\0')
-							displayed_port = DEF_PGPORT_STR;
+							PQclear(res);
+							restoreErrorMessage(conn, &savedMessage);
 
-						PQclear(res);
-						restoreErrorMessage(conn, &savedMessage);
+							/* Not writable; close connection. */
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a writable "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+							conn->status = CONNECTION_OK;
+							sendTerminateConn(conn);
+							pqDropConnection(conn, true);
 
-						/* Not writable; close connection. */
-						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
-														"connection to server "
-														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
-						conn->status = CONNECTION_OK;
-						sendTerminateConn(conn);
-						pqDropConnection(conn, true);
+							/* Skip any remaining addresses for this host. */
+							conn->addr_cur = NULL;
+							if (conn->whichhost + 1 < conn->nconnhost)
+							{
+								conn->status = CONNECTION_NEEDED;
+								goto keep_going;
+							}
 
-						/* Skip any remaining addresses for this host. */
-						conn->addr_cur = NULL;
-						if (conn->whichhost + 1 < conn->nconnhost)
+							/* No more addresses to try. So we fail. */
+							goto error_return;
+						}
+						else	/* conn->target_session_attrs is prefer-read */
 						{
-							conn->status = CONNECTION_NEEDED;
+							PQclear(res);
+							termPQExpBuffer(&savedMessage);
+
+							/* We can release the address lists now. */
+							release_all_addrinfo(conn);
+
+							/*
+							 * Finish reading any remaining messages before
+							 * being considered as ready.
+							 */
+							conn->status = CONNECTION_CONSUME;
 							goto keep_going;
 						}
-
-						/* No more addresses to try. So we fail. */
-						goto error_return;
 					}
-					PQclear(res);
-					termPQExpBuffer(&savedMessage);
+					else		/* server support read-write */
+					{
+						if (strcmp(conn->target_session_attrs, "read-write") == 0)
+						{
+							PQclear(res);
+							termPQExpBuffer(&savedMessage);
 
-					/* We can release the address lists now. */
-					release_all_addrinfo(conn);
+							/* We can release the address lists now. */
+							release_all_addrinfo(conn);
 
-					/*
-					 * Finish reading any remaining messages before being
-					 * considered as ready.
-					 */
-					conn->status = CONNECTION_CONSUME;
-					goto keep_going;
+							/*
+							 * Finish reading any remaining messages before
+							 * being considered as ready.
+							 */
+							conn->status = CONNECTION_CONSUME;
+							goto keep_going;
+						}
+						else	/* conn->target_session_attrs is prefer-read */
+						{
+							/* is it the last connection? */
+							if ((conn->whichhost + 1 < conn->nconnhost) &&
+								(conn->read_write_host_index != -2))
+							{
+								if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+									displayed_host = conn->connhost[conn->whichhost].hostaddr;
+								else
+									displayed_host = conn->connhost[conn->whichhost].host;
+								displayed_port = conn->connhost[conn->whichhost].port;
+								if (displayed_port == NULL || displayed_port[0] == '\0')
+									displayed_port = DEF_PGPORT_STR;
+
+								PQclear(res);
+								restoreErrorMessage(conn, &savedMessage);
+
+								/* Not read-only; close connection. */
+								appendPQExpBuffer(&conn->errorMessage,
+												  libpq_gettext("could not make a read-only "
+																"connection to server "
+																"\"%s:%s\"\n"),
+												  displayed_host, displayed_port);
+
+								/*
+								 * Connecting to a writable server, close it
+								 * and try to connect to another one.
+								 */
+								conn->status = CONNECTION_OK;
+								sendTerminateConn(conn);
+								pqDropConnection(conn, true);
+
+								/* Skip any remaining addresses for this host. */
+								conn->addr_cur = NULL;
+
+								conn->status = CONNECTION_NEEDED;
+
+								/* Record read-write host index, if not yet */
+								if (conn->read_write_host_index == -1)
+									conn->read_write_host_index = conn->whichhost;
+
+								goto keep_going;
+							}
+							else	/* No more host to connect, keep this
+									 * connection */
+							{
+								PQclear(res);
+								termPQExpBuffer(&savedMessage);
+
+								/* We can release the address lists now. */
+								release_all_addrinfo(conn);
+
+								/*
+								 * Finish reading any remaining messages
+								 * before being considered as ready.
+								 */
+								conn->status = CONNECTION_CONSUME;
+								goto keep_going;
+							}
+						}
+					}
 				}
 
 				/*
@@ -3393,6 +3510,8 @@ makeEmptyPGconn(void)
 		conn = NULL;
 	}
 
+	conn->read_write_host_index = -1;
+
 	return conn;
 }
 
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 9a586ff25a..39504999b3 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -362,7 +362,7 @@ struct pg_conn
 	char	   *krbsrvname;		/* Kerberos service name */
 #endif
 
-	/* Type of connection to make.  Possible values: any, read-write. */
+	/* Type of connection to make.  Possible values: any, read-write, perfer-read. */
 	char	   *target_session_attrs;
 
 	/* Optional file to write trace info to */
@@ -396,6 +396,7 @@ struct pg_conn
 	int			nconnhost;		/* # of possible hosts */
 	int			whichhost;		/* host we're currently considering */
 	pg_conn_host *connhost;		/* details about each possible host */
+	int 		read_write_host_index; /* index for first read-write host in connhost */
 
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index a0d3e8f357..e994306f28 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 28;
+use Test::More tests => 31;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -117,6 +117,18 @@ test_target_session_attrs($node_master, $node_standby_1, $node_master, "any",
 test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 	"any", 0);
 
+# Connect to standby1 in "prefer-read" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1, "prefer-read",
+	0);
+
+# Connect to standby1 in "prefer-read" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+	"prefer-read", 0);
+
+# Connect to node_master in "prefer-read" mode with only master list.
+test_target_session_attrs($node_master, $node_master, $node_master,
+	"prefer-read", 0);
+
 note "switching to physical replication slot";
 
 # Switch to using a physical replication slot. We can do this without a new
-- 
2.16.1.windows.4

