Alvaro Herrera <[email protected]> writes:
> Tom Lane wrote:
>> It also appears to me that do_start_bgworker's treatment of fork
>> failure is completely brain dead. Did anyone really think about
>> that case?
> Hmm, I probably modelled it on autovacuum without giving that case much
> additional consideration.
Attached is a proposed patch that should make fork failure behave
sanely, ie it works much the same as a worker crash immediately after
launch. I also refactored things a bit to make do_start_bgworker
fully responsible for updating the RegisteredBgWorker's state,
rather than doing just some of it as before.
I tested this by hot-wiring the fork_process call to fail some of
the time, which showed that the postmaster now seems to recover OK,
but parallel.c's logic is completely innocent of the idea that
worker-startup failure is possible. The leader backend just freezes,
and nothing short of kill -9 on that backend will get you out of it.
Fixing that seems like material for a separate patch though.
regards, tom lane
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index c382345..8461c24 100644
*** a/src/backend/postmaster/postmaster.c
--- b/src/backend/postmaster/postmaster.c
*************** static void TerminateChildren(int signal
*** 420,425 ****
--- 420,426 ----
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
static int CountChildren(int target);
+ static bool assign_backendlist_entry(RegisteredBgWorker *rw);
static void maybe_start_bgworker(void);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
static pid_t StartChildProcess(AuxProcType type);
*************** bgworker_forkexec(int shmem_slot)
*** 5531,5543 ****
* Start a new bgworker.
* Starting time conditions must have been checked already.
*
* This code is heavily based on autovacuum.c, q.v.
*/
! static void
do_start_bgworker(RegisteredBgWorker *rw)
{
pid_t worker_pid;
ereport(DEBUG1,
(errmsg("starting background worker process \"%s\"",
rw->rw_worker.bgw_name)));
--- 5532,5564 ----
* Start a new bgworker.
* Starting time conditions must have been checked already.
*
+ * Returns true on success, false on failure.
+ * In either case, update the RegisteredBgWorker's state appropriately.
+ *
* This code is heavily based on autovacuum.c, q.v.
*/
! static bool
do_start_bgworker(RegisteredBgWorker *rw)
{
pid_t worker_pid;
+ Assert(rw->rw_pid == 0);
+
+ /*
+ * Allocate and assign the Backend element. Note we must do this before
+ * forking, so that we can handle out of memory properly.
+ *
+ * Treat failure as though the worker had crashed. That way, the
+ * postmaster will wait a bit before attempting to start it again; if it
+ * tried again right away, most likely it'd find itself repeating the
+ * out-of-memory or fork failure condition.
+ */
+ if (!assign_backendlist_entry(rw))
+ {
+ rw->rw_crashed_at = GetCurrentTimestamp();
+ return false;
+ }
+
ereport(DEBUG1,
(errmsg("starting background worker process \"%s\"",
rw->rw_worker.bgw_name)));
*************** do_start_bgworker(RegisteredBgWorker *rw
*** 5549,5557 ****
#endif
{
case -1:
ereport(LOG,
(errmsg("could not fork worker process: %m")));
! return;
#ifndef EXEC_BACKEND
case 0:
--- 5570,5586 ----
#endif
{
case -1:
+ /* in postmaster, fork failed ... */
ereport(LOG,
(errmsg("could not fork worker process: %m")));
! /* undo what assign_backendlist_entry did */
! ReleasePostmasterChildSlot(rw->rw_child_slot);
! rw->rw_child_slot = 0;
! free(rw->rw_backend);
! rw->rw_backend = NULL;
! /* mark entry as crashed, so we'll try again later */
! rw->rw_crashed_at = GetCurrentTimestamp();
! break;
#ifndef EXEC_BACKEND
case 0:
*************** do_start_bgworker(RegisteredBgWorker *rw
*** 5575,5588 ****
PostmasterContext = NULL;
StartBackgroundWorker();
break;
#endif
default:
rw->rw_pid = worker_pid;
rw->rw_backend->pid = rw->rw_pid;
ReportBackgroundWorkerPID(rw);
! break;
}
}
/*
--- 5604,5627 ----
PostmasterContext = NULL;
StartBackgroundWorker();
+
+ exit(1); /* should not get here */
break;
#endif
default:
+ /* in postmaster, fork successful ... */
rw->rw_pid = worker_pid;
rw->rw_backend->pid = rw->rw_pid;
ReportBackgroundWorkerPID(rw);
! /* add new worker to lists of backends */
! dlist_push_head(&BackendList, &rw->rw_backend->elem);
! #ifdef EXEC_BACKEND
! ShmemBackendArrayAdd(rw->rw_backend);
! #endif
! return true;
}
+
+ return false;
}
/*
*************** bgworker_should_start_now(BgWorkerStartT
*** 5629,5634 ****
--- 5668,5675 ----
* Allocate the Backend struct for a connected background worker, but don't
* add it to the list of backends just yet.
*
+ * On failure, return false without changing any worker state.
+ *
* Some info from the Backend is copied into the passed rw.
*/
static bool
*************** assign_backendlist_entry(RegisteredBgWor
*** 5647,5654 ****
ereport(LOG,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("could not generate random cancel key")));
-
- rw->rw_crashed_at = GetCurrentTimestamp();
return false;
}
--- 5688,5693 ----
*************** assign_backendlist_entry(RegisteredBgWor
*** 5658,5671 ****
ereport(LOG,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
-
- /*
- * The worker didn't really crash, but setting this nonzero makes
- * postmaster wait a bit before attempting to start it again; if it
- * tried again right away, most likely it'd find itself under the same
- * memory pressure.
- */
- rw->rw_crashed_at = GetCurrentTimestamp();
return false;
}
--- 5697,5702 ----
*************** assign_backendlist_entry(RegisteredBgWor
*** 5687,5692 ****
--- 5718,5728 ----
* As a side effect, the bgworker control variables are set or reset whenever
* there are more workers to start after this one, and whenever the overall
* system state requires it.
+ *
+ * The reason we start at most one worker per call is to avoid consuming the
+ * postmaster's attention for too long when many such requests are pending.
+ * As long as StartWorkerNeeded is true, ServerLoop will not block and will
+ * call this function again after dealing with any other issues.
*/
static void
maybe_start_bgworker(void)
*************** maybe_start_bgworker(void)
*** 5694,5706 ****
slist_mutable_iter iter;
TimestampTz now = 0;
if (FatalError)
{
StartWorkerNeeded = false;
HaveCrashedWorker = false;
! return; /* not yet */
}
HaveCrashedWorker = false;
slist_foreach_modify(iter, &BackgroundWorkerList)
--- 5730,5748 ----
slist_mutable_iter iter;
TimestampTz now = 0;
+ /*
+ * During crash recovery, we have no need to be called until the state
+ * transition out of recovery.
+ */
if (FatalError)
{
StartWorkerNeeded = false;
HaveCrashedWorker = false;
! return;
}
+ /* Don't need to be called again unless we find a reason for it below */
+ StartWorkerNeeded = false;
HaveCrashedWorker = false;
slist_foreach_modify(iter, &BackgroundWorkerList)
*************** maybe_start_bgworker(void)
*** 5709,5719 ****
rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
! /* already running? */
if (rw->rw_pid != 0)
continue;
! /* marked for death? */
if (rw->rw_terminate)
{
ForgetBackgroundWorker(&iter);
--- 5751,5761 ----
rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
! /* ignore if already running */
if (rw->rw_pid != 0)
continue;
! /* if marked for death, clean up and remove from list */
if (rw->rw_terminate)
{
ForgetBackgroundWorker(&iter);
*************** maybe_start_bgworker(void)
*** 5735,5746 ****
--- 5777,5790 ----
continue;
}
+ /* read system time only when needed */
if (now == 0)
now = GetCurrentTimestamp();
if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
rw->rw_worker.bgw_restart_time * 1000))
{
+ /* Set flag to remember that we have workers to start later */
HaveCrashedWorker = true;
continue;
}
*************** maybe_start_bgworker(void)
*** 5748,5782 ****
if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
{
! /* reset crash time before calling assign_backendlist_entry */
rw->rw_crashed_at = 0;
/*
! * Allocate and assign the Backend element. Note we must do this
! * before forking, so that we can handle out of memory properly.
*/
! if (!assign_backendlist_entry(rw))
return;
!
! do_start_bgworker(rw); /* sets rw->rw_pid */
!
! dlist_push_head(&BackendList, &rw->rw_backend->elem);
! #ifdef EXEC_BACKEND
! ShmemBackendArrayAdd(rw->rw_backend);
! #endif
/*
! * Have ServerLoop call us again. Note that there might not
! * actually *be* another runnable worker, but we don't care all
! * that much; we will find out the next time we run.
*/
StartWorkerNeeded = true;
return;
}
}
-
- /* no runnable worker found */
- StartWorkerNeeded = false;
}
/*
--- 5792,5826 ----
if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
{
! /* reset crash time before trying to start worker */
rw->rw_crashed_at = 0;
/*
! * Try to start the worker.
! *
! * On failure, give up processing workers for now, but set
! * StartWorkerNeeded so we'll come back here on the next iteration
! * of ServerLoop to try again. (We don't want to wait, because
! * there might be additional ready-to-run workers.) We could set
! * HaveCrashedWorker as well, since this worker is now marked
! * crashed, but there's no need because the next run of this
! * function will do that.
*/
! if (!do_start_bgworker(rw))
! {
! StartWorkerNeeded = true;
return;
! }
/*
! * Quit, but have ServerLoop call us again to look for additional
! * ready-to-run workers. There might not be any, but we'll find
! * out the next time we run.
*/
StartWorkerNeeded = true;
return;
}
}
}
/*
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers