Hi!

I have primary server on port 55942 and two standbys on 55943 and 55944. Then use connection string like "postgresql://127.0.0.1:55942,127.0.0.1:55943,127.0.0.1:55944/postgres" to connect to the servers via psql.

Everything works perfectly no matter how many attempts to connect I do.
But when I stop primary server, very rarely I do get an error "received invalid response to SSL negotiation" from psql. I got this error when I tried to make massive connects/disconnects test and it's unlikely to reproduce manually without thousands of connections sequentially with no intentional delay in between.

The problem present only on Linux, MacOS works fine.

As far as I understand this particular problem is because of postgresql gets "zero" (i.e. 0) byte in SSLok in PQconnectPoll@src/interfaces/libpq/fe-connect.c. This lead to select "else" branch with described error message. This may be fixed by handling zero byte as 'E' byte. But I'm not sure if it's good solution, since I don't know why fe-connect gets an zero byte at all.

I consider it's worth to correct this. This error is rare but it's really odd to notice this unexpected and wrong behavior.

---
Best regards,
Maxim Orlov.
From 860f14c99ec70fcea58897c97e218b8a54286a39 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.or...@postgrespro.ru>
Date: Mon, 1 Mar 2021 16:47:41 +0300
Subject: [PATCH] WIP

---
 src/interfaces/libpq/Makefile           |   3 +
 src/interfaces/libpq/fe-connect.c       |   2 +-
 src/interfaces/libpq/t/004-multihost.pl | 143 ++++++++++++++++++++++++
 3 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 src/interfaces/libpq/t/004-multihost.pl

diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile
index f74677eaf9b..98fd1d694ef 100644
--- a/src/interfaces/libpq/Makefile
+++ b/src/interfaces/libpq/Makefile
@@ -120,6 +120,9 @@ install: all installdirs install-lib
 installcheck:
 	$(MAKE) -C test $@
 
+check:
+	$(prove_check)
+
 installdirs: installdirs-lib
 	$(MKDIR_P) '$(DESTDIR)$(includedir)' '$(DESTDIR)$(includedir_internal)' '$(DESTDIR)$(datadir)'
 
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index db71fea169c..43c14a4433a 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -3023,7 +3023,7 @@ keep_going:						/* We will come back to here until there is
 						conn->status = CONNECTION_MADE;
 						return PGRES_POLLING_WRITING;
 					}
-					else if (SSLok == 'E')
+					else if (SSLok == 'E' || SSLok == '\0')
 					{
 						/*
 						 * Server failure of some sort, such as failure to
diff --git a/src/interfaces/libpq/t/004-multihost.pl b/src/interfaces/libpq/t/004-multihost.pl
new file mode 100644
index 00000000000..116f4a82635
--- /dev/null
+++ b/src/interfaces/libpq/t/004-multihost.pl
@@ -0,0 +1,143 @@
+# target_server_type
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 10000;
+use Scalar::Util qw(blessed);
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->append_conf('postgresql.conf', "listen_addresses = '$PostgresNode::test_localhost'");
+$node_master->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_master->backup($backup_name);
+
+# Create streaming standby 1 linking to master
+my $node_standby_1 = get_new_node('standby_1');
+$node_standby_1->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby_1->append_conf('postgresql.conf', "listen_addresses = '$PostgresNode::test_localhost'");
+$node_standby_1->start;
+
+# Create streaming standby 2 linking to master
+my $node_standby_2 = get_new_node('standby_2');
+$node_standby_2->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby_2->append_conf('postgresql.conf', "listen_addresses = '$PostgresNode::test_localhost'");
+$node_standby_2->start;
+
+sub get_host_port
+{
+	my $node = shift;
+	return "$PostgresNode::test_localhost:" . $node->port;
+}
+
+sub multiconnstring
+{
+	my $nodes    = shift;
+	my $database = shift || "postgres";
+	my $params   = shift;
+	my $extra    = "";
+	if ($params)
+	{
+		my @cs;
+		while (my ($key, $val) = each %$params)
+		{
+			push @cs, $key . "=" . $val;
+		}
+		$extra = "?" . join("&", @cs);
+	}
+	my $str =
+		"postgresql://"
+		. join(",", map({ get_host_port($_) } @$nodes))
+		. "/$database$extra";
+
+	return $str;
+}
+
+#
+# Copied from PosgresNode.pm passing explicit connect-string instead of
+# constructed from object
+#
+
+sub psql2
+{
+	# We expect dbname to be part of connstr
+	my ($connstr, $sql, %params) = @_;
+
+	my $stdout            = $params{stdout};
+	my $stderr            = $params{stderr};
+	my @psql_params       = ('psql', '-XAtq', '-d', $connstr, '-f', '-');
+
+	# Allocate temporary ones to capture them so we
+	# can return them. Otherwise we won't redirect them at all.
+	if (!defined($stdout))
+	{
+		my $temp_stdout = "";
+		$stdout = \$temp_stdout;
+	}
+	if (!defined($stderr))
+	{
+		my $temp_stderr = "";
+		$stderr = \$temp_stderr;
+	}
+
+	# IPC::Run would otherwise append to existing contents:
+	$$stdout = "" if ref($stdout);
+	$$stderr = "" if ref($stderr);
+
+	my $ret;
+
+	# Run psql and capture any possible exceptions.  If the exception is
+	# because of a timeout and the caller requested to handle that, just return
+	# and set the flag.  Otherwise, and for any other exception, rethrow.
+	#
+	# For background, see
+	# http://search.cpan.org/~ether/Try-Tiny-0.24/lib/Try/Tiny.pm
+	do
+	{
+		local $@;
+		eval {
+			my @ipcrun_opts = (\@psql_params, '<', \$sql);
+			push @ipcrun_opts, '>',  $stdout if defined $stdout;
+			push @ipcrun_opts, '2>', $stderr if defined $stderr;
+
+			IPC::Run::run @ipcrun_opts;
+			$ret = $?;
+		};
+	};
+
+	return ($ret, $$stdout, $$stderr);
+}
+
+# returns host:port retrieved via \conninfo
+sub psql_conninfo
+{
+	my ($connstr) = shift;
+	my ($retcode, $stdout, $stderr) = psql2($connstr, '\conninfo');
+	if ($retcode == 0 && $stdout =~ /on host "([^"]*)" at port "([^"]*)"/s)
+	{
+		return "$1:$2";
+	}
+	else
+	{
+		return "STDOUT:$stdout\nSTDERR:$stderr";
+	}
+}
+
+my $conninfo;
+my $expected;
+my $connnstr;
+
+# Test 10 - one of standbys is not available
+$node_standby_1->stop();
+# Test 10.1
+for my $i (1 .. 10000)
+{
+	$conninfo =
+		psql_conninfo(
+			multiconnstring([ $node_standby_1, $node_master, $node_standby_2 ]));
+	is($conninfo, get_host_port($node_master), "first node is unavailable");
+}
-- 
2.17.1

Reply via email to