From ae87ca271dc8bd27a0d74ba3afecad2c54e86a90 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@enterprisedb.com>
Date: Wed, 11 Apr 2018 11:30:36 +1200
Subject: [PATCH] Replace PostmasterIsAlive() calls with WL_POSTMASTER_DEATH
 checks.

In several places we had a loop with a WaitEventSet, but we also made an
explicit PostmasterIsAlive() call.   That caused contention in the kernel of
some operating systems, and was mostly redundant because we already receive
notification from the WaitEventSet system when the postmaster dies.
---
 src/backend/postmaster/pgarch.c       |  4 ++-
 src/backend/replication/syncrep.c     | 18 ++++++++------
 src/backend/replication/walreceiver.c | 16 ++++++------
 src/backend/replication/walsender.c   | 47 ++++++++++++++---------------------
 4 files changed, 39 insertions(+), 46 deletions(-)

diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 885e85ad8af..e43ac4d4f47 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -393,6 +393,8 @@ pgarch_MainLoop(void)
 							   WAIT_EVENT_ARCHIVER_MAIN);
 				if (rc & WL_TIMEOUT)
 					wakened = true;
+				if (rc & WL_POSTMASTER_DEATH)
+					time_to_stop = true;
 			}
 			else
 				wakened = true;
@@ -403,7 +405,7 @@ pgarch_MainLoop(void)
 		 * or after completing one more archiving cycle after receiving
 		 * SIGUSR2.
 		 */
-	} while (PostmasterIsAlive() && !time_to_stop);
+	} while (!time_to_stop);
 }
 
 /*
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index 75d26817192..7fc784f5916 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -214,6 +214,8 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 	 */
 	for (;;)
 	{
+		int			rc;
+
 		/* Must reset the latch before testing state. */
 		ResetLatch(MyLatch);
 
@@ -266,25 +268,25 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 			break;
 		}
 
+		/*
+		 * Wait on latch.  Any condition that should wake us up will set the
+		 * latch, so no need for timeout.
+		 */
+		rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1,
+					   WAIT_EVENT_SYNC_REP);
+
 		/*
 		 * If the postmaster dies, we'll probably never get an
 		 * acknowledgement, because all the wal sender processes will exit. So
 		 * just bail out.
 		 */
-		if (!PostmasterIsAlive())
+		if (rc & WL_POSTMASTER_DEATH)
 		{
 			ProcDiePending = true;
 			whereToSendOutput = DestNone;
 			SyncRepCancelWait();
 			break;
 		}
-
-		/*
-		 * Wait on latch.  Any condition that should wake us up will set the
-		 * latch, so no need for timeout.
-		 */
-		WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1,
-				  WAIT_EVENT_SYNC_REP);
 	}
 
 	/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index b9dab322d6b..b097b724fce 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -681,14 +681,9 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 	WakeupRecovery();
 	for (;;)
 	{
-		ResetLatch(walrcv->latch);
+		int		rc;
 
-		/*
-		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.
-		 */
-		if (!PostmasterIsAlive())
-			exit(1);
+		ResetLatch(walrcv->latch);
 
 		ProcessWalRcvInterrupts();
 
@@ -716,8 +711,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 		}
 		SpinLockRelease(&walrcv->mutex);
 
-		WaitLatch(walrcv->latch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
-				  WAIT_EVENT_WAL_RECEIVER_WAIT_START);
+		rc = WaitLatch(walrcv->latch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
+					   WAIT_EVENT_WAL_RECEIVER_WAIT_START);
+
+		if (rc & WL_POSTMASTER_DEATH)
+			exit(1);
 	}
 
 	if (update_process_title)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 642e859439f..51aa26627a1 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1186,6 +1186,7 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
 	/* If we have pending write here, go to slow path */
 	for (;;)
 	{
+		int			rc;
 		int			wakeEvents;
 		long		sleeptime;
 
@@ -1209,15 +1210,11 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
 			WL_SOCKET_WRITEABLE | WL_SOCKET_READABLE | WL_TIMEOUT;
 
 		/* Sleep until something happens or we time out */
-		WaitLatchOrSocket(MyLatch, wakeEvents,
-						  MyProcPort->sock, sleeptime,
-						  WAIT_EVENT_WAL_SENDER_WRITE_DATA);
+		rc = WaitLatchOrSocket(MyLatch, wakeEvents,
+							   MyProcPort->sock, sleeptime,
+							   WAIT_EVENT_WAL_SENDER_WRITE_DATA);
 
-		/*
-		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.
-		 */
-		if (!PostmasterIsAlive())
+		if (rc & WL_POSTMASTER_DEATH)
 			exit(1);
 
 		/* Clear any already-pending wakeups */
@@ -1297,16 +1294,10 @@ WalSndWaitForWal(XLogRecPtr loc)
 
 	for (;;)
 	{
+		int			rc;
 		long		sleeptime;
 		TimestampTz now;
 
-		/*
-		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.
-		 */
-		if (!PostmasterIsAlive())
-			exit(1);
-
 		/* Clear any already-pending wakeups */
 		ResetLatch(MyLatch);
 
@@ -1406,9 +1397,12 @@ WalSndWaitForWal(XLogRecPtr loc)
 		if (pq_is_send_pending())
 			wakeEvents |= WL_SOCKET_WRITEABLE;
 
-		WaitLatchOrSocket(MyLatch, wakeEvents,
-						  MyProcPort->sock, sleeptime,
-						  WAIT_EVENT_WAL_SENDER_WAIT_WAL);
+		rc = WaitLatchOrSocket(MyLatch, wakeEvents,
+							   MyProcPort->sock, sleeptime,
+							   WAIT_EVENT_WAL_SENDER_WAIT_WAL);
+
+		if (rc & WL_POSTMASTER_DEATH)
+			exit(1);
 	}
 
 	/* reactivate latch so WalSndLoop knows to continue */
@@ -2108,13 +2102,6 @@ WalSndLoop(WalSndSendDataCallback send_data)
 	{
 		TimestampTz now;
 
-		/*
-		 * Emergency bailout if postmaster has died.  This is to avoid the
-		 * necessity for manual cleanup of all postmaster children.
-		 */
-		if (!PostmasterIsAlive())
-			exit(1);
-
 		/* Clear any already-pending wakeups */
 		ResetLatch(MyLatch);
 
@@ -2203,6 +2190,7 @@ WalSndLoop(WalSndSendDataCallback send_data)
 		 */
 		if ((WalSndCaughtUp && !streamingDoneSending) || pq_is_send_pending())
 		{
+			int			rc;
 			long		sleeptime;
 			int			wakeEvents;
 
@@ -2215,9 +2203,12 @@ WalSndLoop(WalSndSendDataCallback send_data)
 				wakeEvents |= WL_SOCKET_WRITEABLE;
 
 			/* Sleep until something happens or we time out */
-			WaitLatchOrSocket(MyLatch, wakeEvents,
-							  MyProcPort->sock, sleeptime,
-							  WAIT_EVENT_WAL_SENDER_MAIN);
+			rc = WaitLatchOrSocket(MyLatch, wakeEvents,
+								   MyProcPort->sock, sleeptime,
+								   WAIT_EVENT_WAL_SENDER_MAIN);
+
+			if (rc & WL_POSTMASTER_DEATH)
+				exit(1);
 		}
 	}
 	return;
-- 
2.16.2

