Hi, Currently pg_terminate_backend(), sends SIGTERM to the backend process but doesn't ensure it's exit. There are chances that backends still are running(even after pg_terminate_backend() is called) until the interrupts are processed(using ProcessInterrupts()). This could cause problems especially in testing, for instance in a sql file right after pg_terminate_backend(), if any test case depends on the backend's non-existence[1], but the backend is not terminated. As discussed in [1], we have wait_pid()(see regress.c and sql/dblink.sql), but it's not usable across the system. In [1], we thought it would be better to have functions ensuring the backend's exit on the similar lines of pg_terminate_backend().
I propose to have two functions: 1. pg_terminate_backend_and_wait() -- which sends SIGTERM to the backend and wait's until it's exit. 2. pg_wait_backend() -- which waits for a given backend process. Note that this function has to be used carefully after pg_terminate_backend(), if used on a backend that's not ternmited it simply keeps waiting in a loop. Attaching a WIP patch herewith. Thoughts? [1] https://www.postgresql.org/message-id/flat/f31cc4da-a7ea-677f-cf64-a2f9db854bf5%40oss.nttdata.com With Regards, Bharath Rupireddy. EnterpriseDB: http://www.enterprisedb.com
From b1714f9fd95007b9fb6c26b675f83fa73fecd562 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddy@enterprisedb.com> Date: Wed, 21 Oct 2020 11:52:34 +0530 Subject: [PATCH v1] pg_terminate_backend_and_wait --- src/backend/storage/ipc/signalfuncs.c | 71 +++++++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 7 +++ 2 files changed, 78 insertions(+) diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index d822e82cb9..002ac5f10d 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -149,6 +149,77 @@ pg_terminate_backend(PG_FUNCTION_ARGS) PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS); } +/* + * Signal to terminate a backend process and wait until no process has a given + * PID. This is allowed if you are a member of the role whose process is being + * terminated. + * + * Note that only superusers can signal superuser-owned processes. + */ +Datum +pg_terminate_backend_and_wait(PG_FUNCTION_ARGS) +{ + int pid = PG_GETARG_INT32(0); + bool r = DatumGetBool(DirectFunctionCall1(pg_terminate_backend, + PG_GETARG_DATUM(0))); + if (r) + { + while (kill(pid, 0) == 0) + { + /* Process interrupts in case of self termination. */ + if (pid == MyProcPid) + CHECK_FOR_INTERRUPTS(); + + pg_usleep(50000); + } + + if (errno != ESRCH) + elog(ERROR, "could not check PID %d liveness: %m", pid); + } + + PG_RETURN_BOOL(r); +} + +Datum +pg_wait_backend(PG_FUNCTION_ARGS) +{ + int pid = PG_GETARG_INT32(0); + PGPROC *proc = BackendPidGetProc(pid); + + /* + * BackendPidGetProc returns NULL if the pid isn't valid; but by the time + * we reach kill(), a process for which we get a valid proc here might + * have terminated on its own. There's no way to acquire a lock on an + * arbitrary process to prevent that. But since so far all the callers of + * this mechanism involve some request for ending the process anyway, that + * it might end on its own first is not a problem. + */ + if (proc == NULL) + { + /* + * This is just a warning so a loop-through-resultset will not abort + * if one backend terminated on its own during the run. + */ + ereport(WARNING, + (errmsg("PID %d is not a PostgreSQL server process", pid))); + PG_RETURN_BOOL(false); + } + + if (!superuser()) + elog(ERROR, "must be superuser to check PID liveness"); + + while (kill(pid, 0) == 0) + { + //CHECK_FOR_INTERRUPTS(); + pg_usleep(50000); + } + + if (errno != ESRCH) + elog(ERROR, "could not check PID %d liveness: %m", pid); + + PG_RETURN_BOOL(true); +} + /* * Signal to reload the database configuration * diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 22340baf1c..1c502d51e3 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6085,6 +6085,13 @@ { oid => '2096', descr => 'terminate a server process', proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool', proargtypes => 'int4', prosrc => 'pg_terminate_backend' }, +{ oid => '16386', descr => 'terminate a server process and wait for it\'s exit', + proname => 'pg_terminate_backend_and_wait', provolatile => 'v', + prorettype => 'bool', proargtypes => 'int4', + prosrc => 'pg_terminate_backend_and_wait' }, +{ oid => '16387', descr => 'waits for a backend\'s exit', + proname => 'pg_wait_backend', provolatile => 'v', prorettype => 'bool', + proargtypes => 'int4', prosrc => 'pg_wait_backend' }, { oid => '2172', descr => 'prepare for taking an online backup', proname => 'pg_start_backup', provolatile => 'v', proparallel => 'r', prorettype => 'pg_lsn', proargtypes => 'text bool bool', -- 2.25.1