On 2021-03-01 17:22, Maxim Orlov wrote:
Hi!

I have primary server on port 55942 and two standbys on 55943 and
55944. Then use connection string like
"postgresql://127.0.0.1:55942,127.0.0.1:55943,127.0.0.1:55944/postgres"
to connect to the servers via psql.

Everything works perfectly no matter how many attempts to connect I do.
But when I stop primary server, very rarely I do get an error
"received invalid response to SSL negotiation" from psql. I got this
error when I tried to make massive connects/disconnects test and it's
unlikely to reproduce manually without thousands of connections
sequentially with no intentional delay in between.

The problem present only on Linux, MacOS works fine.

As far as I understand this particular problem is because of
postgresql gets "zero" (i.e. 0) byte in SSLok in
PQconnectPoll@src/interfaces/libpq/fe-connect.c. This lead to select
"else" branch with described error message. This may be fixed by
handling zero byte as 'E' byte. But I'm not sure if it's good
solution, since I don't know why fe-connect gets an zero byte at all.

I consider it's worth to correct this. This error is rare but it's
really odd to notice this unexpected and wrong behavior.

---
Best regards,
Maxim Orlov.

Correction. Previous patch was wrong. The proper one is here.

---
Best regards,
Maxim Orlov.
From fcc2392b1942d58e1de6749b65589757fb35322f Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.or...@postgrespro.ru>
Date: Thu, 8 Apr 2021 18:12:46 +0300
Subject: [PATCH] WIP-2

---
 src/interfaces/libpq/Makefile           |   3 +
 src/interfaces/libpq/fe-connect.c       |   5 +
 src/interfaces/libpq/t/004-multihost.pl | 143 ++++++++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 src/interfaces/libpq/t/004-multihost.pl

diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile
index 0c4e55b6ad3..aa9328f98b0 100644
--- a/src/interfaces/libpq/Makefile
+++ b/src/interfaces/libpq/Makefile
@@ -120,6 +120,9 @@ install: all installdirs install-lib
 installcheck:
 	$(MAKE) -C test $@
 
+check:
+	$(prove_check)
+
 installdirs: installdirs-lib
 	$(MKDIR_P) '$(DESTDIR)$(includedir)' '$(DESTDIR)$(includedir_internal)' '$(DESTDIR)$(datadir)'
 
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index aa654dd6a8e..8aa017d858e 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -2319,6 +2319,7 @@ keep_going:						/* We will come back to here until there is
 		conn->try_next_addr = false;
 	}
 
+keep_going2:
 	/* Time to advance to next connhost[] entry? */
 	if (conn->try_next_host)
 	{
@@ -3081,6 +3082,10 @@ keep_going:						/* We will come back to here until there is
 						conn->status = CONNECTION_AWAITING_RESPONSE;
 						goto keep_going;
 					}
+					else if (SSLok == '\0')
+					{
+						goto keep_going2;
+					}
 					else
 					{
 						appendPQExpBuffer(&conn->errorMessage,
diff --git a/src/interfaces/libpq/t/004-multihost.pl b/src/interfaces/libpq/t/004-multihost.pl
new file mode 100644
index 00000000000..116f4a82635
--- /dev/null
+++ b/src/interfaces/libpq/t/004-multihost.pl
@@ -0,0 +1,143 @@
+# target_server_type
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 10000;
+use Scalar::Util qw(blessed);
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->append_conf('postgresql.conf', "listen_addresses = '$PostgresNode::test_localhost'");
+$node_master->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_master->backup($backup_name);
+
+# Create streaming standby 1 linking to master
+my $node_standby_1 = get_new_node('standby_1');
+$node_standby_1->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby_1->append_conf('postgresql.conf', "listen_addresses = '$PostgresNode::test_localhost'");
+$node_standby_1->start;
+
+# Create streaming standby 2 linking to master
+my $node_standby_2 = get_new_node('standby_2');
+$node_standby_2->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby_2->append_conf('postgresql.conf', "listen_addresses = '$PostgresNode::test_localhost'");
+$node_standby_2->start;
+
+sub get_host_port
+{
+	my $node = shift;
+	return "$PostgresNode::test_localhost:" . $node->port;
+}
+
+sub multiconnstring
+{
+	my $nodes    = shift;
+	my $database = shift || "postgres";
+	my $params   = shift;
+	my $extra    = "";
+	if ($params)
+	{
+		my @cs;
+		while (my ($key, $val) = each %$params)
+		{
+			push @cs, $key . "=" . $val;
+		}
+		$extra = "?" . join("&", @cs);
+	}
+	my $str =
+		"postgresql://"
+		. join(",", map({ get_host_port($_) } @$nodes))
+		. "/$database$extra";
+
+	return $str;
+}
+
+#
+# Copied from PosgresNode.pm passing explicit connect-string instead of
+# constructed from object
+#
+
+sub psql2
+{
+	# We expect dbname to be part of connstr
+	my ($connstr, $sql, %params) = @_;
+
+	my $stdout            = $params{stdout};
+	my $stderr            = $params{stderr};
+	my @psql_params       = ('psql', '-XAtq', '-d', $connstr, '-f', '-');
+
+	# Allocate temporary ones to capture them so we
+	# can return them. Otherwise we won't redirect them at all.
+	if (!defined($stdout))
+	{
+		my $temp_stdout = "";
+		$stdout = \$temp_stdout;
+	}
+	if (!defined($stderr))
+	{
+		my $temp_stderr = "";
+		$stderr = \$temp_stderr;
+	}
+
+	# IPC::Run would otherwise append to existing contents:
+	$$stdout = "" if ref($stdout);
+	$$stderr = "" if ref($stderr);
+
+	my $ret;
+
+	# Run psql and capture any possible exceptions.  If the exception is
+	# because of a timeout and the caller requested to handle that, just return
+	# and set the flag.  Otherwise, and for any other exception, rethrow.
+	#
+	# For background, see
+	# http://search.cpan.org/~ether/Try-Tiny-0.24/lib/Try/Tiny.pm
+	do
+	{
+		local $@;
+		eval {
+			my @ipcrun_opts = (\@psql_params, '<', \$sql);
+			push @ipcrun_opts, '>',  $stdout if defined $stdout;
+			push @ipcrun_opts, '2>', $stderr if defined $stderr;
+
+			IPC::Run::run @ipcrun_opts;
+			$ret = $?;
+		};
+	};
+
+	return ($ret, $$stdout, $$stderr);
+}
+
+# returns host:port retrieved via \conninfo
+sub psql_conninfo
+{
+	my ($connstr) = shift;
+	my ($retcode, $stdout, $stderr) = psql2($connstr, '\conninfo');
+	if ($retcode == 0 && $stdout =~ /on host "([^"]*)" at port "([^"]*)"/s)
+	{
+		return "$1:$2";
+	}
+	else
+	{
+		return "STDOUT:$stdout\nSTDERR:$stderr";
+	}
+}
+
+my $conninfo;
+my $expected;
+my $connnstr;
+
+# Test 10 - one of standbys is not available
+$node_standby_1->stop();
+# Test 10.1
+for my $i (1 .. 10000)
+{
+	$conninfo =
+		psql_conninfo(
+			multiconnstring([ $node_standby_1, $node_master, $node_standby_2 ]));
+	is($conninfo, get_host_port($node_master), "first node is unavailable");
+}
-- 
2.24.3 (Apple Git-128)

Reply via email to