On Sun, Aug 9, 2020 at 11:48 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> Here are some more experimental patches to reduce system calls.
>
> 0001 skips sending signals when the recipient definitely isn't
> waiting, using a new flag-and-memory-barrier dance.  This seems to
> skip around 12% of the kill() calls for "make check", and probably
> helps with some replication configurations that do a lot of
> signalling.  Patch by Andres (with a small memory barrier adjustment
> by me).
>
> 0002 gets rid of the latch self-pipe on Linux systems.
>
> 0003 does the same on *BSD/macOS systems.

Here's a rebase over the recent signal handler/mask reorganisation.

Some thoughts, on looking at this again after a while:

1.  It's a bit clunky that pqinitmask() takes a new argument to say
whether SIGURG should be blocked; that's because the knowledge of
which latch implementation we're using is private to latch.c, and only
the epoll version needs to block it.  I wonder how to make that
tidier.
2.  It's a bit weird to have UnBlockSig (SIGURG remains blocked for
epoll builds) and UnBlockAllSig (SIGURG is also unblocked).  Maybe the
naming is confusing.
3.  Maybe it's strange to continue to use overloaded SIGUSR1 on
non-epoll, non-kqueue systems; perhaps we should use SIGURG
everywhere.
4.  As a nano-optimisation, SetLatch() on a latch the current process
owns might as well use raise(SIGURG) rather than kill().  This is
necessary to close races when SetLatch(MyLatch) runs in a signal
handler.  In other words, although this patch uses signal blocking to
close the race when other processes call SetLatch() and send us
SIGURG, there's still a race if, say, SIGINT is sent to the
checkpointer and it sets its own latch from its SIGINT handler
function; in the user context it may be in WaitEventSetWait() having
just seen latch->is_set == false, and now be about to enter
epoll_pwait()/kevent() after the signal handler returns, so we need to
give it a reason not to go to sleep.

By way of motivation for removing the self-pipe, and where possible
also the signal handler, here is a trace of the WAL writer handling
three requests to write data, on a FreeBSD system, with the patch:

kevent(9,0x0,0,{ SIGURG,... },1,{ 0.200000000 }) = 1 (0x1)
pwrite(4,"\b\M-Q\^E\0\^A\0\0\0\0\0\M-/\^\"...,8192,0xaf0000) = 8192 (0x2000)
kevent(9,0x0,0,{ SIGURG,... },1,{ 0.200000000 }) = 1 (0x1)
pwrite(4,"\b\M-Q\^E\0\^A\0\0\0\0 \M-/\^\\0"...,8192,0xaf2000) = 8192 (0x2000)
kevent(9,0x0,0,{ SIGURG,... },1,{ 0.200000000 }) = 1 (0x1)
pwrite(4,"\b\M-Q\^E\0\^A\0\0\0\0`\M-/\^\\0"...,8192,0xaf6000) = 8192 (0x2000)

Here is the same thing on unpatched master:

kevent(11,0x0,0,0x801c195b0,1,{ 0.200000000 })   ERR#4 'Interrupted system call'
SIGNAL 30 (SIGUSR1) code=SI_USER pid=66575 uid=1001
sigprocmask(SIG_SETMASK,{ SIGUSR1 },0x0)         = 0 (0x0)
write(10,"\0",1)                                 = 1 (0x1)
sigreturn(0x7fffffffc880)                        EJUSTRETURN
pwrite(4,"\b\M-Q\^E\0\^A\0\0\0\0`\M-w)\0\0"...,8192,0xf76000) = 8192 (0x2000)
kevent(11,0x0,0,{ 9,EVFILT_READ,0x0,0,0x1,0x801c19580 },1,{
0.200000000 }) = 1 (0x1)
read(9,"\0",16)                                  = 1 (0x1)
kevent(11,0x0,0,0x801c195b0,1,{ 0.200000000 })   ERR#4 'Interrupted system call'
SIGNAL 30 (SIGUSR1) code=SI_USER pid=66575 uid=1001
sigprocmask(SIG_SETMASK,{ SIGUSR1 },0x0)         = 0 (0x0)
write(10,"\0",1)                                 = 1 (0x1)
sigreturn(0x7fffffffc880)                        EJUSTRETURN
pwrite(4,"\b\M-Q\^E\0\^A\0\0\0\0 \M-y)\0\0"...,8192,0xf92000) = 8192 (0x2000)
kevent(11,0x0,0,{ 9,EVFILT_READ,0x0,0,0x1,0x801c19580 },1,{
0.200000000 }) = 1 (0x1)
SIGNAL 30 (SIGUSR1) code=SI_USER pid=66575 uid=1001
sigprocmask(SIG_SETMASK,{ SIGUSR1 },0x0)         = 0 (0x0)
write(10,"\0",1)                                 = 1 (0x1)
sigreturn(0x7fffffffc880)                        EJUSTRETURN
read(9,"\0\0",16)                                = 2 (0x2)
kevent(11,0x0,0,0x801c195b0,1,{ 0.200000000 })   ERR#4 'Interrupted system call'
SIGNAL 30 (SIGUSR1) code=SI_USER pid=66575 uid=1001
sigprocmask(SIG_SETMASK,{ SIGUSR1 },0x0)         = 0 (0x0)
write(10,"\0",1)                                 = 1 (0x1)
sigreturn(0x7fffffffc880)                        EJUSTRETURN
pwrite(4,"\b\M-Q\^E\0\^A\0\0\0\0\0\M-z)\0"...,8192,0xfa0000) = 8192 (0x2000)
kevent(11,0x0,0,{ 9,EVFILT_READ,0x0,0,0x1,0x801c19580 },1,{
0.200000000 }) = 1 (0x1)
read(9,"\0",16)                                  = 1 (0x1)

The improvement isn't quite as good on Linux, because as far as I can
tell you still have to have an (empty) signal handler installed and it
runs (can we find a way to avoid that?), but you still get to skip all
the pipe manipulation.
From 8dd1ae41ac3f2c8b378dfad64bbfe94eb41d39ab Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 8 Aug 2020 15:08:09 +1200
Subject: [PATCH v2 1/4] Optimize latches to send fewer signals.

Don't send signals to processes that aren't currently sleeping.

Author: Andres Freund <and...@anarazel.de>
Discussion: https://postgr.es/m/ca+hukgjjxpdpzbe0a3hyuywbvazuc89yx3jk9rfzgfv_khu...@mail.gmail.com
---
 src/backend/storage/ipc/latch.c | 26 ++++++++++++++++++++++++++
 src/include/storage/latch.h     |  1 +
 2 files changed, 27 insertions(+)

diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 24d44c982d..f4768b79c2 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -274,6 +274,7 @@ void
 InitLatch(Latch *latch)
 {
 	latch->is_set = false;
+	latch->maybe_sleeping = false;
 	latch->owner_pid = MyProcPid;
 	latch->is_shared = false;
 
@@ -321,6 +322,7 @@ InitSharedLatch(Latch *latch)
 #endif
 
 	latch->is_set = false;
+	latch->maybe_sleeping = false;
 	latch->owner_pid = 0;
 	latch->is_shared = true;
 }
@@ -523,6 +525,10 @@ SetLatch(Latch *latch)
 
 	latch->is_set = true;
 
+	pg_memory_barrier();
+	if (!latch->maybe_sleeping)
+		return;
+
 #ifndef WIN32
 
 	/*
@@ -589,6 +595,7 @@ ResetLatch(Latch *latch)
 {
 	/* Only the owner should reset the latch */
 	Assert(latch->owner_pid == MyProcPid);
+	Assert(latch->maybe_sleeping == false);
 
 	latch->is_set = false;
 
@@ -1289,6 +1296,14 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 		 * ordering, so that we cannot miss seeing is_set if a notification
 		 * has already been queued.
 		 */
+		if (set->latch && !set->latch->is_set)
+		{
+			/* about to sleep on a latch */
+			set->latch->maybe_sleeping = true;
+			pg_memory_barrier();
+			/* and recheck */
+		}
+
 		if (set->latch && set->latch->is_set)
 		{
 			occurred_events->fd = PGINVALID_SOCKET;
@@ -1299,6 +1314,9 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 			occurred_events++;
 			returned_events++;
 
+			/* could have been set above */
+			set->latch->maybe_sleeping = false;
+
 			break;
 		}
 
@@ -1310,6 +1328,12 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 		rc = WaitEventSetWaitBlock(set, cur_timeout,
 								   occurred_events, nevents);
 
+		if (set->latch)
+		{
+			Assert(set->latch->maybe_sleeping);
+			set->latch->maybe_sleeping = false;
+		}
+
 		if (rc == -1)
 			break;				/* timeout occurred */
 		else
@@ -1399,6 +1423,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 		if (cur_event->events == WL_LATCH_SET &&
 			cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
 		{
+			Assert(set->latch);
+
 			/* There's data in the self-pipe, clear it. */
 			drainSelfPipe();
 
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h
index 7c742021fb..d0da7e5d31 100644
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@@ -110,6 +110,7 @@
 typedef struct Latch
 {
 	sig_atomic_t is_set;
+	sig_atomic_t maybe_sleeping;
 	bool		is_shared;
 	int			owner_pid;
 #ifdef WIN32
-- 
2.20.1

From 70fbca0bba6b31faa7390bdbb188b74de7153cb6 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 8 Aug 2020 16:32:22 +1200
Subject: [PATCH v2 2/4] Remove self-pipe in WAIT_USE_EPOLL builds.

Use SIGURG instead of SIGUSR1 for latch wake-ups, and unblock it only
while in epoll_pwait() to avoid the race traditionally solved by the
self-pipe trick.

Discussion: https://postgr.es/m/ca+hukgjjxpdpzbe0a3hyuywbvazuc89yx3jk9rfzgfv_khu...@mail.gmail.com
---
 src/backend/libpq/pqsignal.c        |  13 +++-
 src/backend/postmaster/postmaster.c |   2 +-
 src/backend/storage/ipc/latch.c     | 102 +++++++++++++++++++++-------
 src/backend/utils/init/miscinit.c   |   4 +-
 src/include/libpq/pqsignal.h        |   3 +-
 src/include/storage/latch.h         |   2 +
 6 files changed, 97 insertions(+), 29 deletions(-)

diff --git a/src/backend/libpq/pqsignal.c b/src/backend/libpq/pqsignal.c
index 9289493118..13ec3cd533 100644
--- a/src/backend/libpq/pqsignal.c
+++ b/src/backend/libpq/pqsignal.c
@@ -20,6 +20,7 @@
 
 /* Global variables */
 sigset_t	UnBlockSig,
+			UnBlockAllSig,
 			BlockSig,
 			StartupBlockSig;
 
@@ -35,13 +36,21 @@ sigset_t	UnBlockSig,
  * collection; it's essentially BlockSig minus SIGTERM, SIGQUIT, SIGALRM.
  *
  * UnBlockSig is the set of signals to block when we don't want to block
- * signals (is this ever nonzero??)
+ * signals.
  */
 void
-pqinitmask(void)
+pqinitmask(bool block_sigurg)
 {
+	sigemptyset(&UnBlockAllSig);
 	sigemptyset(&UnBlockSig);
 
+	/*
+	 * In WAIT_USE_EPOLL builds, we want to keep SIGURG blocked except while
+	 * waiting.
+	 */
+	if (block_sigurg)
+		sigaddset(&UnBlockSig, SIGURG);
+
 	/* First set all signals, then clear some. */
 	sigfillset(&BlockSig);
 	sigfillset(&StartupBlockSig);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b7799ed1d2..1a0323504a 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -641,7 +641,7 @@ PostmasterMain(int argc, char *argv[])
 	 * postmaster/syslogger.c, postmaster/bgworker.c and
 	 * postmaster/checkpointer.c.
 	 */
-	pqinitmask();
+	pqinitmask(LatchBlockedSIGURG());
 	PG_SETMASK(&BlockSig);
 
 	pqsignal_pm(SIGHUP, SIGHUP_handler);	/* reread config file and have
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index f4768b79c2..03670eca90 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -3,7 +3,7 @@
  * latch.c
  *	  Routines for inter-process latches
  *
- * The Unix implementation uses the so-called self-pipe trick to overcome the
+ * The poll() implementation uses the so-called self-pipe trick to overcome the
  * race condition involved with poll() (or epoll_wait() on linux) and setting
  * a global flag in the signal handler. When a latch is set and the current
  * process is waiting for it, the signal handler wakes up the poll() in
@@ -19,6 +19,11 @@
  * process, SIGUSR1 is sent and the signal handler in the waiting process
  * writes the byte to the pipe on behalf of the signaling process.
  *
+ * The epoll() implementation overcomes the race with a different technique:
+ * it uses SIGURG to wake processes, but only unblocks it while waiting.
+ * Since epoll_pwait() atomically unblocks and begins waiting, there is no
+ * race, so there is no need for the self-pipe trick.
+ *
  * The Windows implementation uses Windows events that are inherited by all
  * postmaster child processes. There's no need for the self-pipe trick there.
  *
@@ -46,6 +51,7 @@
 #include <poll.h>
 #endif
 
+#include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "port/atomics.h"
@@ -79,6 +85,15 @@
 #error "no wait set implementation available"
 #endif
 
+#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
+#define NEED_SELF_PIPE_TRICK
+/* Use a signal that is overloaded with the procsignal mechanism. */
+#define LATCH_SIGNAL SIGUSR1
+#else
+/* Use a dedicated signal for latches if we have race-free waiting. */
+#define LATCH_SIGNAL SIGURG
+#endif
+
 /* typedef in latch.h */
 struct WaitEventSet
 {
@@ -140,6 +155,7 @@ static WaitEventSet *LatchWaitSet;
 /* Are we currently in WaitLatch? The signal handler would like to know. */
 static volatile sig_atomic_t waiting = false;
 
+#ifdef NEED_SELF_PIPE_TRICK
 /* Read and write ends of the self-pipe */
 static int	selfpipe_readfd = -1;
 static int	selfpipe_writefd = -1;
@@ -150,10 +166,12 @@ static int	selfpipe_owner_pid = 0;
 /* Private function prototypes */
 static void sendSelfPipeByte(void);
 static void drainSelfPipe(void);
+#endif
 #endif							/* WIN32 */
 
 #if defined(WAIT_USE_EPOLL)
 static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
+static void sigurg_handler(SIGNAL_ARGS);
 #elif defined(WAIT_USE_KQUEUE)
 static void WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events);
 #elif defined(WAIT_USE_POLL)
@@ -174,7 +192,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 void
 InitializeLatchSupport(void)
 {
-#ifndef WIN32
+#ifdef NEED_SELF_PIPE_TRICK
 	int			pipefd[2];
 
 	if (IsUnderPostmaster)
@@ -244,8 +262,10 @@ InitializeLatchSupport(void)
 	/* Tell fd.c about these two long-lived FDs */
 	ReserveExternalFD();
 	ReserveExternalFD();
-#else
-	/* currently, nothing to do here for Windows */
+#endif
+
+#ifdef WAIT_USE_EPOLL
+	pqsignal(SIGURG, sigurg_handler);
 #endif
 }
 
@@ -278,10 +298,10 @@ InitLatch(Latch *latch)
 	latch->owner_pid = MyProcPid;
 	latch->is_shared = false;
 
-#ifndef WIN32
+#ifdef NEED_SELF_PIPE_TRICK
 	/* Assert InitializeLatchSupport has been called in this process */
 	Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
-#else
+#elif defined(WIN32)
 	latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
 	if (latch->event == NULL)
 		elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
@@ -337,8 +357,8 @@ InitSharedLatch(Latch *latch)
  * there is any risk of that, caller must provide an interlock to prevent it.
  *
  * In any process that calls OwnLatch(), make sure that
- * latch_sigusr1_handler() is called from the SIGUSR1 signal handler,
- * as shared latches use SIGUSR1 for inter-process communication.
+ * latch_sigusr1_handler() is called from the SIGUSR1 signal handler, as shared
+ * latches use SIGUSR1 for inter-process communication in some builds.
  */
 void
 OwnLatch(Latch *latch)
@@ -346,7 +366,7 @@ OwnLatch(Latch *latch)
 	/* Sanity checks */
 	Assert(latch->is_shared);
 
-#ifndef WIN32
+#ifdef NEED_SELF_PIPE_TRICK
 	/* Assert InitializeLatchSupport has been called in this process */
 	Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
 #endif
@@ -558,11 +578,17 @@ SetLatch(Latch *latch)
 		return;
 	else if (owner_pid == MyProcPid)
 	{
+#ifdef NEED_SELF_PIPE_TRICK
 		if (waiting)
 			sendSelfPipeByte();
+#else
+		if (waiting)
+			raise(LATCH_SIGNAL);
+#endif
 	}
 	else
-		kill(owner_pid, SIGUSR1);
+		kill(owner_pid, LATCH_SIGNAL);
+
 #else
 
 	/*
@@ -858,8 +884,11 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
 	{
 		set->latch = latch;
 		set->latch_pos = event->pos;
-#ifndef WIN32
+#ifdef NEED_SELF_PIPE_TRICK
 		event->fd = selfpipe_readfd;
+#else
+		event->fd = PGINVALID_SOCKET;
+		return event->pos;
 #endif
 	}
 	else if (events == WL_POSTMASTER_DEATH)
@@ -936,10 +965,10 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
 		set->latch = latch;
 		/*
 		 * On Unix, we don't need to modify the kernel object because the
-		 * underlying pipe is the same for all latches so we can return
-		 * immediately.  On Windows, we need to update our array of handles,
-		 * but we leave the old one in place and tolerate spurious wakeups if
-		 * the latch is disabled.
+		 * underlying pipe (if there is one) is the same for all latches so we
+		 * can return immediately.  On Windows, we need to update our array of
+		 * handles, but we leave the old one in place and tolerate spurious
+		 * wakeups if the latch is disabled.
 		 */
 #if defined(WAIT_USE_WIN32)
 		if (!latch)
@@ -1362,7 +1391,7 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
 #if defined(WAIT_USE_EPOLL)
 
 /*
- * Wait using linux's epoll_wait(2).
+ * Wait using linux's epoll_pwait(2).
  *
  * This is the preferable wait method, as several readiness notifications are
  * delivered, without having to iterate through all of set->events. The return
@@ -1379,8 +1408,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 	struct epoll_event *cur_epoll_event;
 
 	/* Sleep */
-	rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
-					nevents, cur_timeout);
+	rc = epoll_pwait(set->epoll_fd, set->epoll_ret_events,
+					 nevents, cur_timeout, &UnBlockAllSig);
 
 	/* Check return code */
 	if (rc < 0)
@@ -1425,8 +1454,10 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 		{
 			Assert(set->latch);
 
+#ifdef NEED_SELF_PIPE_TRICK
 			/* There's data in the self-pipe, clear it. */
 			drainSelfPipe();
+#endif
 
 			if (set->latch && set->latch->is_set)
 			{
@@ -1952,7 +1983,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 #endif
 
 /*
- * SetLatch uses SIGUSR1 to wake up the process waiting on the latch.
+ * SetLatch uses SIGUSR1 to wake up the process waiting on the latch in some
+ * builds.
  *
  * Wake up WaitLatch, if we're waiting.  (We might not be, since SIGUSR1 is
  * overloaded for multiple purposes; or we might not have reached WaitLatch
@@ -1965,13 +1997,15 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 void
 latch_sigusr1_handler(void)
 {
+#ifdef NEED_SELF_PIPE_TRICK
 	if (waiting)
 		sendSelfPipeByte();
+#endif
 }
 #endif							/* !WIN32 */
 
 /* Send one byte to the self-pipe, to wake up WaitLatch */
-#ifndef WIN32
+#ifdef NEED_SELF_PIPE_TRICK
 static void
 sendSelfPipeByte(void)
 {
@@ -2001,7 +2035,7 @@ retry:
 		return;
 	}
 }
-#endif							/* !WIN32 */
+#endif							/* !NEED_SELF_PIPE_TRICK */
 
 /*
  * Read all available data from the self-pipe
@@ -2010,7 +2044,7 @@ retry:
  * return, it must reset that flag first (though ideally, this will never
  * happen).
  */
-#ifndef WIN32
+#ifdef NEED_SELF_PIPE_TRICK
 static void
 drainSelfPipe(void)
 {
@@ -2049,4 +2083,26 @@ drainSelfPipe(void)
 		/* else buffer wasn't big enough, so read again */
 	}
 }
-#endif							/* !WIN32 */
+#endif							/* !NEED_SELF_PIPE_TRICK */
+
+#ifdef WAIT_USE_EPOLL
+/*
+ * SIGURG handler.
+ *
+ * No action; this signal is intended only to wake up epoll_pwait().
+ */
+static void
+sigurg_handler(SIGNAL_ARGS)
+{
+}
+#endif
+
+bool
+LatchBlockedSIGURG(void)
+{
+#ifdef WAIT_USE_EPOLL
+	return true;
+#else
+	return false;
+#endif
+}
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index ed2ab4b5b2..fa29e54fee 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -137,7 +137,7 @@ InitPostmasterChild(void)
 
 	/* In EXEC_BACKEND case we will not have inherited BlockSig etc values */
 #ifdef EXEC_BACKEND
-	pqinitmask();
+	pqinitmask(LatchBlockedSIGURG());
 #endif
 
 	/*
@@ -178,7 +178,7 @@ InitStandaloneProcess(const char *argv0)
 	 * For consistency with InitPostmasterChild, initialize signal mask here.
 	 * But we don't unblock SIGQUIT or provide a default handler for it.
 	 */
-	pqinitmask();
+	pqinitmask(LatchBlockedSIGURG());
 	PG_SETMASK(&BlockSig);
 
 	/* Compute paths, no postmaster to inherit from */
diff --git a/src/include/libpq/pqsignal.h b/src/include/libpq/pqsignal.h
index 695c4dc3f8..021f7fd6be 100644
--- a/src/include/libpq/pqsignal.h
+++ b/src/include/libpq/pqsignal.h
@@ -31,10 +31,11 @@ extern int	pqsigsetmask(int mask);
 #endif							/* WIN32 */
 
 extern sigset_t UnBlockSig,
+			UnBlockAllSig,
 			BlockSig,
 			StartupBlockSig;
 
-extern void pqinitmask(void);
+extern void pqinitmask(bool should_block_sigurg);
 
 /* pqsigfunc is declared in src/include/port.h */
 extern pqsigfunc pqsignal_pm(int signo, pqsigfunc func);
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h
index d0da7e5d31..01b98c326b 100644
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@@ -179,6 +179,8 @@ extern int	WaitLatchOrSocket(Latch *latch, int wakeEvents,
 							  pgsocket sock, long timeout, uint32 wait_event_info);
 extern void InitializeLatchWaitSet(void);
 
+extern bool LatchBlockedSIGURG(void);
+
 /*
  * Unix implementation uses SIGUSR1 for inter-process signaling.
  * Win32 doesn't need this.
-- 
2.20.1

From 868498397465505401168dd92d10d612824771d5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 8 Aug 2020 07:52:47 +1200
Subject: [PATCH v2 3/4] Remove self-pipe in WAIT_USE_KQUEUE builds.

Instead of using a signal handler for latch wakeup, use EVFILT_SIGNAL.
This closes the race condition traditionally solved by the self-pipe
trick.

Discussion: https://postgr.es/m/ca+hukgjjxpdpzbe0a3hyuywbvazuc89yx3jk9rfzgfv_khu...@mail.gmail.com
---
 src/backend/storage/ipc/latch.c | 41 +++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 03670eca90..a01b101ac6 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -24,6 +24,9 @@
  * Since epoll_pwait() atomically unblocks and begins waiting, there is no
  * race, so there is no need for the self-pipe trick.
  *
+ * The kqueue() implementation uses yet another technique: it uses SIGURG,
+ * but installs no signal handler, and waits for signals using EVFILT_SIGNAL.
+ *
  * The Windows implementation uses Windows events that are inherited by all
  * postmaster child processes. There's no need for the self-pipe trick there.
  *
@@ -85,7 +88,7 @@
 #error "no wait set implementation available"
 #endif
 
-#if defined(WAIT_USE_POLL) || defined(WAIT_USE_KQUEUE)
+#if defined(WAIT_USE_POLL)
 #define NEED_SELF_PIPE_TRICK
 /* Use a signal that is overloaded with the procsignal mechanism. */
 #define LATCH_SIGNAL SIGUSR1
@@ -267,6 +270,9 @@ InitializeLatchSupport(void)
 #ifdef WAIT_USE_EPOLL
 	pqsignal(SIGURG, sigurg_handler);
 #endif
+#ifdef WAIT_USE_KQUEUE
+	pqsignal(SIGURG, SIG_IGN);
+#endif
 }
 
 void
@@ -888,7 +894,9 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
 		event->fd = selfpipe_readfd;
 #else
 		event->fd = PGINVALID_SOCKET;
+#ifdef WAIT_USE_EPOLL
 		return event->pos;
+#endif
 #endif
 	}
 	else if (events == WL_POSTMASTER_DEATH)
@@ -1108,6 +1116,18 @@ WaitEventAdjustKqueueAddPostmaster(struct kevent *k_ev, WaitEvent *event)
 	AccessWaitEvent(k_ev) = event;
 }
 
+static inline void
+WaitEventAdjustKqueueAddLatch(struct kevent *k_ev, WaitEvent *event)
+{
+	/* For now latch can only be added, not removed. */
+	k_ev->ident = SIGURG;
+	k_ev->filter = EVFILT_SIGNAL;
+	k_ev->flags = EV_ADD;
+	k_ev->fflags = 0;
+	k_ev->data = 0;
+	AccessWaitEvent(k_ev) = event;
+}
+
 /*
  * old_events is the previous event mask, used to compute what has changed.
  */
@@ -1139,6 +1159,14 @@ WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events)
 		 */
 		WaitEventAdjustKqueueAddPostmaster(&k_ev[count++], event);
 	}
+	else if (event->events == WL_LATCH_SET)
+	{
+		/*
+		 * Unlike all the other implementations, we detect latch wakeup using
+		 * signal events, rather than installing a signal handler.
+		 */
+		 WaitEventAdjustKqueueAddLatch(&k_ev[count++], event);
+	}
 	else
 	{
 		/*
@@ -1146,11 +1174,9 @@ WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events)
 		 * old event mask to the new event mask, since kevent treats readable
 		 * and writable as separate events.
 		 */
-		if (old_events == WL_LATCH_SET ||
-			(old_events & WL_SOCKET_READABLE))
+		if (old_events & WL_SOCKET_READABLE)
 			old_filt_read = true;
-		if (event->events == WL_LATCH_SET ||
-			(event->events & WL_SOCKET_READABLE))
+		if (event->events & WL_SOCKET_READABLE)
 			new_filt_read = true;
 		if (old_events & WL_SOCKET_WRITEABLE)
 			old_filt_write = true;
@@ -1607,11 +1633,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
 		occurred_events->events = 0;
 
 		if (cur_event->events == WL_LATCH_SET &&
-			cur_kqueue_event->filter == EVFILT_READ)
+			cur_kqueue_event->filter == EVFILT_SIGNAL)
 		{
-			/* There's data in the self-pipe, clear it. */
-			drainSelfPipe();
-
 			if (set->latch && set->latch->is_set)
 			{
 				occurred_events->fd = PGINVALID_SOCKET;
-- 
2.20.1

Reply via email to