Attached is revision of this patch that now treats the latch in PGPROC, waitLatch, as the generic "process latch", rather than just using it for sync rep; It is initialised appropriately as a shared latch generically, within InitProcGlobal(), and ownership is subsequently set within InitProcess(). We were doing so before, though only for the benefit of sync rep.
The idea here is to have a per-process latch to guard against time-out invalidation issues from within each generic signal handler, by calling SetLatch() on our generic latch there, much as we already do from within non-generic archiver process signal handlers on the archiver's static, non-generic latch (the archiver has its MyProc pointer set to NULL, and we allow for the possibility that generic handlers may be registered within processes that have a NULL MyProc - though latch timeout invalidation issues then become the responsibility of the process exclusively, just as is currently the case with the archiver. In other words, they better not register a generic signal handler). It doesn't really matter that the SetLatch() call will usually be unnecessary, because, as Heikki once pointed out, redundantly setting a latch that is already set is very cheap. We don't check if it's set directly in advance of setting the latch, because the Latch struct is "logically opaque" and there is no "public function" to check if it's set, nor should there be; the checking simply happens in SetLatch(). On 18 July 2011 20:06, Heikki Linnakangas <heikki.linnakan...@enterprisedb.com> wrote: > Right, we can easily change the timeout argument to be in milliseconds > instead of microseconds. I've done so in this latest revision as a precautionary measure. I don't see much point in sub-millisecond granularity, and besides, the Windows implementation will not provide that granularity anyway as things stand. Thoughts? -- Peter Geoghegan http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training and Services
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6a6959f..b2cf973 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10210,7 +10210,7 @@ retry: /* * Wait for more WAL to arrive, or timeout to be reached */ - WaitLatch(&XLogCtl->recoveryWakeupLatch, WL_LATCH_SET | WL_TIMEOUT, 5000000L); + WaitLatch(&XLogCtl->recoveryWakeupLatch, WL_LATCH_SET | WL_TIMEOUT, 5000L); ResetLatch(&XLogCtl->recoveryWakeupLatch); } else diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c index 9940a42..0663eb8 100644 --- a/src/backend/port/unix_latch.c +++ b/src/backend/port/unix_latch.c @@ -182,7 +182,7 @@ DisownLatch(volatile Latch *latch) * to wait for. If the latch is already set (and WL_LATCH_SET is given), the * function returns immediately. * - * The 'timeout' is given in microseconds. It must be >= 0 if WL_TIMEOUT + * The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT * event is given, otherwise it is ignored. On some platforms, signals cause * the timeout to be restarted, so beware that the function can sleep for * several times longer than the specified timeout. @@ -236,8 +236,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, if (wakeEvents & WL_TIMEOUT) { Assert(timeout >= 0); - tv.tv_sec = timeout / 1000000L; - tv.tv_usec = timeout % 1000000L; + tv.tv_sec = timeout / 1000L; + /* tv_usec should be in microseconds */ + tv.tv_usec = (timeout % 1000L) * 1000L; tvp = &tv; } @@ -329,6 +330,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, /* * Sets a latch and wakes up anyone waiting on it. Returns quickly if the * latch is already set. + * + * Note that there is a general requirement to call this function within + * signal handlers, to ensure that latch timeout is not invalidated. For + * generic signal handlers that may be registered for multiple processes, + * it will be called with the per-process latch in PGPROC as its argument. */ void SetLatch(volatile Latch *latch) diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c index 8d59ad4..5ee4f40 100644 --- a/src/backend/port/win32_latch.c +++ b/src/backend/port/win32_latch.c @@ -99,23 +99,14 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, SOCKET sock, int numevents; int result = 0; int pmdeath_eventno = 0; - long timeout_ms; Assert(wakeEvents != 0); + Assert(timeout >= 0 || (wakeEvents & WL_TIMEOUT) == 0); /* Ignore WL_SOCKET_* events if no valid socket is given */ if (sock == PGINVALID_SOCKET) wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE); - /* Convert timeout to milliseconds for WaitForMultipleObjects() */ - if (wakeEvents & WL_TIMEOUT) - { - Assert(timeout >= 0); - timeout_ms = timeout / 1000; - } - else - timeout_ms = INFINITE; - /* Construct an array of event handles for WaitforMultipleObjects() */ latchevent = latch->event; @@ -162,7 +153,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, SOCKET sock, break; } - rc = WaitForMultipleObjects(numevents, events, FALSE, timeout_ms); + rc = WaitForMultipleObjects(numevents, events, FALSE, (wakeEvents & WL_TIMEOUT)?timeout:INFINITE); if (rc == WAIT_FAILED) elog(ERROR, "WaitForMultipleObjects() failed: error code %d", (int) GetLastError()); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 2f3fcbf..acfc3b4 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -84,6 +84,7 @@ #include "postmaster/postmaster.h" #include "storage/bufmgr.h" #include "storage/ipc.h" +#include "storage/latch.h" #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procsignal.h" @@ -567,38 +568,20 @@ AutoVacLauncherMain(int argc, char *argv[]) /* * Sleep for a while according to schedule. + * Wait on Latch. * - * On some platforms, signals won't interrupt the sleep. To ensure we - * respond reasonably promptly when someone signals us, break down the - * sleep into 1-second increments, and check for interrupts after each - * nap. + * We handle wait time invalidation by calling + * SetLatch() in signal handlers. */ - while (nap.tv_sec > 0 || nap.tv_usec > 0) - { - uint32 sleeptime; - - if (nap.tv_sec > 0) - { - sleeptime = 1000000; - nap.tv_sec--; - } - else - { - sleeptime = nap.tv_usec; - nap.tv_usec = 0; - } - pg_usleep(sleeptime); - - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (!PostmasterIsAlive()) - proc_exit(1); - - if (got_SIGTERM || got_SIGHUP || got_SIGUSR2) - break; - } + WaitLatch(&MyProc->waitLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L)); + /* + * Emergency bailout if postmaster has died. This is to avoid the + * necessity for manual cleanup of all postmaster children. + * + * This happens again here because we may have slept for quite a while. + */ + if (!PostmasterIsAlive()) + proc_exit(1); DisableCatchupInterrupt(); @@ -1322,6 +1305,8 @@ static void avl_sighup_handler(SIGNAL_ARGS) { got_SIGHUP = true; + /* let the waiting loop iterate */ + SetLatch(&MyProc->waitLatch); } /* SIGUSR2: a worker is up and running, or just finished, or failed to fork */ @@ -1329,6 +1314,8 @@ static void avl_sigusr2_handler(SIGNAL_ARGS) { got_SIGUSR2 = true; + /* let the waiting loop iterate */ + SetLatch(&MyProc->waitLatch); } /* SIGTERM: time to die */ @@ -1336,6 +1323,8 @@ static void avl_sigterm_handler(SIGNAL_ARGS) { got_SIGTERM = true; + /* let the waiting loop iterate */ + SetLatch(&MyProc->waitLatch); } diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 2070fbb..8d1dab7 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -409,7 +409,7 @@ pgarch_MainLoop(void) int rc; rc = WaitLatch(&mainloop_latch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - timeout * 1000000L); + timeout * 1000L); if (rc & WL_TIMEOUT) wakened = true; } diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 32db2bc..4672a52 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -171,7 +171,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN) * postmaster death regularly while waiting. Note that timeout here * does not necessarily release from loop. */ - WaitLatch(&MyProc->waitLatch, WL_LATCH_SET | WL_TIMEOUT, 60000000L); + WaitLatch(&MyProc->waitLatch, WL_LATCH_SET | WL_TIMEOUT, 60000L); /* Must reset the latch before testing state. */ ResetLatch(&MyProc->waitLatch); diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 63a6304..c1996ca 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -812,7 +812,7 @@ WalSndLoop(void) if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents, - MyProcPort->sock, sleeptime * 1000L); + MyProcPort->sock, sleeptime); /* Check for replication timeout */ if (replication_timeout > 0 && diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 28bcaa7..892941c 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -22,6 +22,7 @@ #include "miscadmin.h" #include "storage/ipc.h" #include "storage/latch.h" +#include "storage/proc.h" #include "storage/procsignal.h" #include "storage/shmem.h" #include "storage/sinval.h" @@ -255,6 +256,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) { int save_errno = errno; + if (MyProc) + SetLatch(&MyProc->waitLatch); + if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT)) HandleCatchupInterrupt(); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index f9b3028..8360bd5 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1613,6 +1613,8 @@ handle_sig_alarm(SIGNAL_ARGS) (void) CheckStatementTimeout(); errno = save_errno; + if (MyProc) + SetLatch(&MyProc->waitLatch); } /* diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index f035a48..ec91f18 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -2643,12 +2643,12 @@ die(SIGNAL_ARGS) InterruptHoldoffCount--; ProcessInterrupts(); } - - /* Interrupt any sync rep wait which is currently in progress. */ - SetLatch(&(MyProc->waitLatch)); } errno = save_errno; + /* set the process latch - we cannot risk invalidating its timeout */ + if (MyProc) + SetLatch(&MyProc->waitLatch); } /* @@ -2684,18 +2684,22 @@ StatementCancelHandler(SIGNAL_ARGS) InterruptHoldoffCount--; ProcessInterrupts(); } - - /* Interrupt any sync rep wait which is currently in progress. */ - SetLatch(&(MyProc->waitLatch)); } errno = save_errno; + /* set the process latch - we cannot risk invalidating its timeout */ + if (MyProc) + SetLatch(&MyProc->waitLatch); } /* signal handler for floating point exception */ void FloatExceptionHandler(SIGNAL_ARGS) { + /* set the process latch - we cannot risk invalidating its timeout */ + if (MyProc) + SetLatch(&MyProc->waitLatch); + ereport(ERROR, (errcode(ERRCODE_FLOATING_POINT_EXCEPTION), errmsg("floating-point exception"), @@ -2709,6 +2713,9 @@ static void SigHupHandler(SIGNAL_ARGS) { got_SIGHUP = true; + /* set the process latch - we cannot risk invalidating its timeout */ + if (MyProc) + SetLatch(&MyProc->waitLatch); } /* diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 09ac3cf..71e41d3 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -126,13 +126,14 @@ struct PGPROC LOCKMASK heldLocks; /* bitmask for lock types already held on this * lock object by this backend */ + Latch waitLatch; /* generic latch for process */ + /* * Info to allow us to wait for synchronous replication, if needed. * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend. * syncRepState must not be touched except by owning process or WALSender. * syncRepLinks used only while holding SyncRepLock. */ - Latch waitLatch; /* allow us to wait for sync rep */ XLogRecPtr waitLSN; /* waiting for this LSN or higher */ int syncRepState; /* wait state for sync rep */ SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers