From 401d97a1b116839139685ed9a3ee22291bc44ea3 Mon Sep 17 00:00:00 2001
From: Jacob Champion <jacob.champion@enterprisedb.com>
Date: Thu, 17 Jul 2025 08:45:45 -0700
Subject: [PATCH 2/2] WIP: add a "drain pending" concept to SSL/GSS

---
 src/interfaces/libpq/fe-misc.c           |  51 ++++++++++-
 src/interfaces/libpq/fe-secure-gssapi.c  |  27 ++++++
 src/interfaces/libpq/fe-secure-openssl.c | 109 +++++++++++++++++++++++
 src/interfaces/libpq/fe-secure.c         |  51 ++++++++++-
 src/interfaces/libpq/libpq-int.h         |   7 ++
 5 files changed, 242 insertions(+), 3 deletions(-)

diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 434216ff89f..49cf32e1bb7 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -55,6 +55,7 @@ static int	pqPutMsgBytes(const void *buf, size_t len, PGconn *conn);
 static int	pqSendSome(PGconn *conn, int len);
 static int	pqSocketCheck(PGconn *conn, int forRead, int forWrite,
 						  pg_usec_time_t end_time);
+static int	pqReadData_internal(PGconn *conn);
 
 /*
  * PQlibVersion: return the libpq version number
@@ -593,6 +594,13 @@ pqPutMsgEnd(PGconn *conn)
 
 /* ----------
  * pqReadData: read more data, if any is available
+ *
+ * Upon a successful return, callers may assume that either 1) all available
+ * bytes have been consumed from the socket, or 2) the socket is still marked
+ * readable by the OS. (In other words: after a successful pqReadData, it's safe
+ * to tell a client to poll for readable bytes on the socket without any further
+ * draining of the SSL/GSS transport buffers.)
+ *
  * Possible return values:
  *	 1: successfully loaded at least one more byte
  *	 0: no data is presently available, but no error detected
@@ -605,8 +613,8 @@ pqPutMsgEnd(PGconn *conn)
 int
 pqReadData(PGconn *conn)
 {
-	int			someread = 0;
-	int			nread;
+	int			available;
+	bool		pending;
 
 	if (conn->sock == PGINVALID_SOCKET)
 	{
@@ -614,6 +622,45 @@ pqReadData(PGconn *conn)
 		return -1;
 	}
 
+	available = pqReadData_internal(conn);
+	if (available < 0)
+		return -1;
+
+	/*
+	 * Make sure there are no bytes stuck in layers between conn->inBuffer and
+	 * the socket, to make it safe for clients to poll on PQsocket(). See
+	 * pqsecure_drain_pending's documentation for details.
+	 */
+	pending = pqsecure_read_is_pending(conn);
+
+	if (available && pending)
+	{
+		if (pqsecure_drain_pending(conn))
+			return -1;
+	}
+	else if (!available)
+	{
+		/*
+		 * If we're not returning any bytes from the underlying transport,
+		 * that must imply there aren't any in the transport buffer...
+		 */
+		Assert(!pending);
+	}
+
+	return available;
+}
+
+/*
+ * Workhorse for pqReadData(). It's kept separate from the
+ * pqsecure_drain_pending() logic to avoid adding to this function's goto
+ * complexity.
+ */
+static int
+pqReadData_internal(PGconn *conn)
+{
+	int			someread = 0;
+	int			nread;
+
 	/* Left-justify any data in the buffer to make room */
 	if (conn->inStart < conn->inEnd)
 	{
diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c
index 7c88e64cfd2..329c2559b88 100644
--- a/src/interfaces/libpq/fe-secure-gssapi.c
+++ b/src/interfaces/libpq/fe-secure-gssapi.c
@@ -475,6 +475,33 @@ pg_GSS_read_is_pending(PGconn *conn)
 	return PqGSSResultLength > PqGSSResultNext;
 }
 
+int
+pg_GSS_drain_pending(PGconn *conn)
+{
+	int			pending;
+
+	/* Figure out how many bytes to take off the connection. */
+	Assert(PqGSSResultLength >= PqGSSResultNext);
+	pending = PqGSSResultLength - PqGSSResultNext;
+
+	if (!pending)
+	{
+		/* Nothing to do. */
+		return 0;
+	}
+
+	/* Expand the input buffer if necessary. */
+	if (pqCheckInBufferSpace(conn->inEnd + (size_t) pending, conn))
+		return -1;				/* errorMessage already set */
+
+	/* Now read the buffered data. */
+	memcpy(conn->inBuffer + conn->inEnd, PqGSSResultBuffer + PqGSSResultNext, pending);
+	conn->inEnd += pending;
+	PqGSSResultNext += pending;
+
+	return 0;
+}
+
 /*
  * Negotiate GSSAPI transport for a connection.  When complete, returns
  * PGRES_POLLING_OK.  Will return PGRES_POLLING_READING or
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index 8f975561e51..b95e229bd08 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -236,6 +236,115 @@ pgtls_read_is_pending(PGconn *conn)
 	return SSL_pending(conn->ssl) > 0;
 }
 
+/*
+ * Helper callback for use with ERR_print_errors_cb(). This appends the raw
+ * error queue to the provided PQExpBuffer, one entry per line.
+ *
+ * Note that this is not pretty output; it's meant for debugging.
+ */
+static int
+append_error_queue(const char *str, size_t len, void *u)
+{
+	PQExpBuffer buf = u;
+
+	appendBinaryPQExpBuffer(buf, str, len);
+	appendPQExpBufferChar(buf, '\n');
+
+	return 0;
+}
+
+int
+pgtls_drain_pending(PGconn *conn)
+{
+	int			pending;
+	size_t		drained;
+
+	/*
+	 * OpenSSL readahead is documented to break SSL_pending(). Plus, we can't
+	 * afford to have OpenSSL take bytes off the socket without processing
+	 * them; that breaks the postconditions for pqsecure_drain_pending().
+	 */
+	Assert(!SSL_get_read_ahead(conn->ssl));
+
+	/* Figure out how many bytes to take off the connection. */
+	pending = SSL_pending(conn->ssl);
+
+	if (!pending)
+	{
+		/* Nothing to do. */
+		return 0;
+	}
+	else if (pending < 0)
+	{
+		/* Shouldn't be possible, but don't let it mess up the math below. */
+		Assert(false);
+		libpq_append_conn_error(conn, "OpenSSL reports negative bytes pending");
+		return -1;
+	}
+	else if (pending == INT_MAX)
+	{
+		/*
+		 * If we ever found a legitimate way to hit this, we'd need to loop
+		 * around to call SSL_pending() again. Throw an error rather than
+		 * complicate the code in that way, because SSL_read() should be
+		 * bounded to the size of a single TLS record, and conn->inBuffer
+		 * can't currently go past INT_MAX in size anyway.
+		 */
+		libpq_append_conn_error(conn, "OpenSSL reports INT_MAX bytes pending");
+		return -1;
+	}
+
+	/* Expand the input buffer if necessary. */
+	if (pqCheckInBufferSpace(conn->inEnd + (size_t) pending, conn))
+		return -1;				/* errorMessage already set */
+
+	/*
+	 * Now read the buffered data.
+	 *
+	 * Don't defer to pgtls_read(); OpenSSL should guarantee that pending data
+	 * comes off in a single call, and we don't want to use the more
+	 * complicated read-loop behavior. We still have to manage the error
+	 * queue.
+	 */
+	ERR_clear_error();
+	if (!SSL_read_ex(conn->ssl, conn->inBuffer + conn->inEnd, pending, &drained))
+	{
+		int			err = SSL_get_error(conn->ssl, 0);
+
+		/*
+		 * Something is very wrong. Report the error code and the entirety of
+		 * the error queue without any attempt at interpretation. Probably not
+		 * worth complicating things for the sake of translation, either.
+		 */
+		appendPQExpBuffer(&conn->errorMessage,
+						  "unexpected error code %d while draining SSL buffer; ",
+						  err);
+
+		if (ERR_peek_error())
+		{
+			appendPQExpBufferStr(&conn->errorMessage, "error queue follows:\n");
+			ERR_print_errors_cb(append_error_queue, &conn->errorMessage);
+		}
+		else
+			appendPQExpBufferStr(&conn->errorMessage,
+								 "no error queue provided\n");
+
+		return -1;
+	}
+
+	/* Final consistency check. */
+	if (drained != pending)
+	{
+		libpq_append_conn_error(conn,
+								"drained only %zu of %d pending bytes in SSL buffer",
+								drained, pending);
+		return -1;
+	}
+
+	conn->inEnd += pending;
+	return 0;
+}
+
 ssize_t
 pgtls_write(PGconn *conn, const void *ptr, size_t len)
 {
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 94c97ec26fb..a522724373f 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -244,7 +244,9 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
 }
 
 /*
- * Returns true if there are any bytes available in the transport buffer.
+ * Returns true if there are any bytes available in the transport buffer. See
+ * pqsecure_drain_pending() for a more complete discussion of the concepts
+ * involved.
  */
 bool
 pqsecure_read_is_pending(PGconn *conn)
@@ -262,6 +264,53 @@ pqsecure_read_is_pending(PGconn *conn)
 	return 0;
 }
 
+/*
+ * Drains any transport data that is already buffered in userspace and adds it
+ * to conn->inBuffer, enlarging inBuffer if necessary. The drain fails if
+ * inBuffer cannot be made to hold all available transport data.
+ *
+ * This operation is necessary to prevent deadlock, due to a layering violation
+ * designed into our asynchronous client API: pqReadData() and all the parsing
+ * routines above it receive data from the SSL/GSS transport buffer, but clients
+ * poll on the raw PQsocket() handle.
+ *
+ * If the
+ *
+ * Implementations should not attempt to read any more data from the socket
+ * while draining the transport buffer. After a successful return,
+ * pqsecure_bytes_pending() must be zero.
+ */
+int
+pqsecure_drain_pending(PGconn *conn)
+{
+	int			ret;
+
+#ifdef USE_SSL
+	if (conn->ssl_in_use)
+	{
+		ret = pgtls_drain_pending(conn);
+	}
+	else
+#endif
+#ifdef ENABLE_GSS
+	if (conn->gssenc)
+	{
+		ret = pg_GSS_drain_pending(conn);
+	}
+	else
+#endif
+	{
+		/* Plaintext connections have no transport buffer. */
+		ret = 0;
+	}
+
+	/* Keep the implementation honest. */
+	if (ret == 0)
+		Assert(!pqsecure_read_is_pending(conn));
+
+	return ret;
+}
+
 /*
  *	Write data to a secure connection.
  *
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 51bc2d3b740..f89db453527 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -810,6 +810,7 @@ extern PostgresPollingStatusType pqsecure_open_client(PGconn *);
 extern void pqsecure_close(PGconn *);
 extern ssize_t pqsecure_read(PGconn *, void *ptr, size_t len);
 extern bool pqsecure_read_is_pending(PGconn *);
+extern int	pqsecure_drain_pending(PGconn *);
 extern ssize_t pqsecure_write(PGconn *, const void *ptr, size_t len);
 extern ssize_t pqsecure_raw_read(PGconn *, void *ptr, size_t len);
 extern ssize_t pqsecure_raw_write(PGconn *, const void *ptr, size_t len);
@@ -850,6 +851,11 @@ extern ssize_t pgtls_read(PGconn *conn, void *ptr, size_t len);
  */
 extern bool pgtls_read_is_pending(PGconn *conn);
 
+/*
+ *	Reads any data waiting in the SSL read buffer into the connection buffer.
+ */
+extern int	pgtls_drain_pending(PGconn *conn);
+
 /*
  *	Write data to a secure connection.
  *
@@ -897,6 +903,7 @@ extern PostgresPollingStatusType pqsecure_open_gss(PGconn *conn);
 extern ssize_t pg_GSS_write(PGconn *conn, const void *ptr, size_t len);
 extern ssize_t pg_GSS_read(PGconn *conn, void *ptr, size_t len);
 extern bool pg_GSS_read_is_pending(PGconn *conn);
+extern int	pg_GSS_drain_pending(PGconn *conn);
 #endif
 
 /* === in fe-trace.c === */
-- 
2.34.1

