On Mon, Mar 15, 2021 at 10:38 AM Fujii Masao <masao.fu...@oss.nttdata.com> wrote: > On 2021/03/15 12:27, Bharath Rupireddy wrote: > > On Sun, Mar 7, 2021 at 2:39 PM Bharath Rupireddy > > <bharath.rupireddyforpostg...@gmail.com> wrote: > >> Attaching v7 patch for further review. > > > > Attaching v8 patch after rebasing on to the latest master. > > Thanks for rebasing the patch!
Thanks for reviewing. > - WAIT_EVENT_XACT_GROUP_UPDATE > + WAIT_EVENT_XACT_GROUP_UPDATE, > + WAIT_EVENT_BACKEND_TERMINATION > > These should be listed in alphabetical order. Done. > In pg_wait_until_termination's do-while loop, ResetLatch() should be called. > Otherwise, it would enter busy-loop after any signal arrives. Because the > latch is kept set and WaitLatch() always exits immediately in that case. Done. > + /* > + * Wait in steps of waittime milliseconds until this function exits or > + * timeout. > + */ > + int64 waittime = 10; > > 10 ms per cycle seems too frequent? Increased it to 100msec. > + ereport(WARNING, > + (errmsg("timeout cannot be negative > or zero: %lld", > + (long long int) > timeout))); > + > + result = false; > > IMO the parameter should be verified before doing the actual thing. Done. > Why is WARNING thrown in this case? Isn't it better to throw ERROR like > pg_promote() does? Done. Attaching v9 patch for further review. With Regards, Bharath Rupireddy. EnterpriseDB: http://www.enterprisedb.com
From ddcf3f3f4a3dbabdf558b828c5728dfbbdc13a31 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddy@enterprisedb.com> Date: Tue, 16 Mar 2021 14:46:41 +0530 Subject: [PATCH v9] pg_terminate_backend with wait and timeout This patch adds extra default arguments wait and timeout to pg_terminate_backend which will be used to terminate and wait timeout milliseconds for the backend's termination. This patch also adds a new function pg_wait_for_backend_termination(pid, timeout) which checks for the existence of the backend with given PID and waits timeout milliseconds for its termination. --- doc/src/sgml/func.sgml | 31 +++++- doc/src/sgml/monitoring.sgml | 4 + src/backend/catalog/system_views.sql | 10 ++ src/backend/postmaster/pgstat.c | 3 + src/backend/storage/ipc/signalfuncs.c | 143 +++++++++++++++++++++++++- src/include/catalog/pg_proc.dat | 9 +- src/include/pgstat.h | 3 +- 7 files changed, 194 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9492a3c6b9..0a417a0b2b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -24821,7 +24821,7 @@ SELECT collation for ('foo' COLLATE "de_DE"); <indexterm> <primary>pg_terminate_backend</primary> </indexterm> - <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type> ) + <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>wait</parameter> <type>boolean</type> <literal>DEFAULT</literal> <literal>false</literal>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> ) <returnvalue>boolean</returnvalue> </para> <para> @@ -24829,7 +24829,34 @@ SELECT collation for ('foo' COLLATE "de_DE"); specified process ID. This is also allowed if the calling role is a member of the role whose backend is being terminated or the calling role has been granted <literal>pg_signal_backend</literal>, - however only superusers can terminate superuser backends. + however only superusers can terminate superuser backends. When no + <parameter>wait</parameter> and <parameter>timeout</parameter> are + specified, this function sends SIGTERM to the backend with the given + process ID without waiting for its actual termination and returns <literal>true</literal>. + Sometimes the backend process may still exist. With <parameter>wait</parameter> + specified as <literal>true</literal>, the function ensures that the + backend process is actually terminated. It keeps checking for the + existence of the backend process either until the given <parameter>timeout</parameter> + or default 100 milliseconds and returns <literal>true</literal>. On + timeout a warning is emitted and <literal>false</literal> is returned. + </para></entry> + </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_wait_for_backend_termination</primary> + </indexterm> + <function>pg_wait_for_backend_termination</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> ) + <returnvalue>boolean</returnvalue> + </para> + <para> + Checks the existence of backend process with specified process ID. It + waits until the given <parameter>timeout</parameter> or the default 100 + milliseconds. <literal>true</literal> is returned when the backend is + found to be not existing within the <parameter>timeout</parameter> milliseconds. + On timeout, a warning is emitted and <literal>false</literal> is + returned. </para></entry> </row> </tbody> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index db4b4e460c..f362184d5e 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1569,6 +1569,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser </thead> <tbody> + <row> + <entry><literal>BackendTermination</literal></entry> + <entry>Waiting for the termination of a backend.</entry> + </row> <row> <entry><literal>BackupWaitWalArchive</literal></entry> <entry>Waiting for WAL files required for a backup to be successfully diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 0dca65dc7b..85521546e3 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1277,6 +1277,16 @@ CREATE OR REPLACE FUNCTION RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote' PARALLEL SAFE; +CREATE OR REPLACE FUNCTION + pg_terminate_backend(pid integer, wait boolean DEFAULT false, timeout int8 DEFAULT 100) + RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_terminate_backend' + PARALLEL SAFE; + +CREATE OR REPLACE FUNCTION + pg_wait_for_backend_termination(pid integer, timeout int8 DEFAULT 100) + RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_wait_for_backend_termination' + PARALLEL SAFE; + -- legacy definition for compatibility with 9.3 CREATE OR REPLACE FUNCTION json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index b1e2d94951..2f72241d2f 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -4130,6 +4130,9 @@ pgstat_get_wait_ipc(WaitEventIPC w) case WAIT_EVENT_XACT_GROUP_UPDATE: event_name = "XactGroupUpdate"; break; + case WAIT_EVENT_BACKEND_TERMINATION: + event_name = "BackendTermination"; + break; /* no default case, so that compiler will warn */ } diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 69fe23a256..8d879af47c 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -18,6 +18,7 @@ #include "catalog/pg_authid.h" #include "miscadmin.h" +#include "pgstat.h" #include "postmaster/syslogger.h" #include "storage/pmsignal.h" #include "storage/proc.h" @@ -126,15 +127,103 @@ pg_cancel_backend(PG_FUNCTION_ARGS) } /* - * Signal to terminate a backend process. This is allowed if you are a member - * of the role whose process is being terminated. + * Wait until there is no backend process with the given PID and return true. + * On timeout, a warning is emitted and false is returned. + */ +static bool +pg_wait_until_termination(int pid, int64 timeout) +{ + /* + * Wait in steps of waittime milliseconds until this function exits or + * timeout. + */ + int64 waittime = 100; + /* + * Initially remaining time is the entire timeout specified by the user. + */ + int64 remainingtime = timeout; + + /* + * Check existence of the backend. If it doesn't exist, then check for the + * errno ESRCH that confirms it and return true. If the backend still + * exists, then wait for waittime milliseconds, again check for the + * existence. Repeat this until timeout or an error occurs or a pending + * interrupt such as query cancel gets processed. + */ + do + { + if (remainingtime < waittime) + waittime = remainingtime; + + if (kill(pid, 0) == -1) + { + if (errno == ESRCH) + return true; + else + { + ereport(WARNING, + (errmsg("could not check the existence of the backend with PID %d: %m", + pid))); + return false; + } + } + + /* Process interrupts, if any, before going for wait. */ + CHECK_FOR_INTERRUPTS(); + + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + waittime, + WAIT_EVENT_BACKEND_TERMINATION); + + ResetLatch(MyLatch); + + remainingtime -= waittime; + } while (remainingtime > 0); + + ereport(WARNING, + (errmsg("could not wait for the termination of the backend with PID %d within %lld milliseconds", + pid, (long long int) timeout))); + + return false; +} + +/* + * Signal to terminate a backend process. This is allowed if you are a member + * of the role whose process is being terminated. If wait input argument is + * true, then wait until given timeout (default 100) milliseconds or no backend + * has the given PID. On timeout, a warning is emitted. Self termination is + * allowed but waiting is not, because once the backend is self-terminated + * there is no point in waiting for it to go away. If wait input argument is + * false (which is default), then this function just signals the backend and + * exits. * * Note that only superusers can signal superuser-owned processes. */ Datum pg_terminate_backend(PG_FUNCTION_ARGS) { - int r = pg_signal_backend(PG_GETARG_INT32(0), SIGTERM); + int pid; + int r; + bool wait; + int timeout; + bool result; + + pid = PG_GETARG_INT32(0); + wait = PG_GETARG_BOOL(1); + + /* If asked to wait, check whether the timeout value is valid or not. */ + if (wait && pid != MyProcPid) + { + timeout = PG_GETARG_INT64(2); + + if (timeout <= 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"timeout\" must not be negative or zero"))); + } + + r = pg_signal_backend(pid, SIGTERM); if (r == SIGNAL_BACKEND_NOSUPERUSER) ereport(ERROR, @@ -146,7 +235,53 @@ pg_terminate_backend(PG_FUNCTION_ARGS) (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend"))); - PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS); + result = (r == SIGNAL_BACKEND_SUCCESS); + + /* + * Wait only if requested and the termination is successful. Self + * termination is allowed but waiting is not. + */ + if (wait && pid != MyProcPid && result) + result = pg_wait_until_termination(pid, timeout); + + PG_RETURN_BOOL(result); +} + +/* + * This function waits for a backend with the given PID to be terminated or + * until the timeout milliseconds occurs. If the PID is of the backend which + * issued this function, either timeout can occur or the function can succeed + * if the backend gets terminated by some other process, say postmaster. + */ +Datum +pg_wait_for_backend_termination(PG_FUNCTION_ARGS) +{ + int pid; + int64 timeout; + PGPROC *proc = NULL; + bool result = false; + + pid = PG_GETARG_INT32(0); + timeout = PG_GETARG_INT64(1); + + if (timeout <= 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("\"timeout\" must not be negative or zero"))); + + proc = BackendPidGetProc(pid); + + if (proc == NULL) + { + ereport(WARNING, + (errmsg("PID %d is not a PostgreSQL server process", pid))); + + PG_RETURN_BOOL(result); + } + + result = pg_wait_until_termination(pid, timeout); + + PG_RETURN_BOOL(result); } /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 93393fcfd4..3f158de1dc 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6143,9 +6143,14 @@ { oid => '2171', descr => 'cancel a server process\' current query', proname => 'pg_cancel_backend', provolatile => 'v', prorettype => 'bool', proargtypes => 'int4', prosrc => 'pg_cancel_backend' }, -{ oid => '2096', descr => 'terminate a server process', +{ oid => '2096', descr => 'terminate a backend process and if specified, wait for its exit or until timeout occurs', proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool', - proargtypes => 'int4', prosrc => 'pg_terminate_backend' }, + proargtypes => 'int4 bool int8', proargnames => '{pid,wait,timeout}', + prosrc => 'pg_terminate_backend' }, +{ oid => '16386', descr => 'wait for a backend process exit or timeout occurs', + proname => 'pg_wait_for_backend_termination', provolatile => 'v', prorettype => 'bool', + proargtypes => 'int4 int8', proargnames => '{pid,timeout}', + prosrc => 'pg_wait_for_backend_termination' }, { oid => '2172', descr => 'prepare for taking an online backup', proname => 'pg_start_backup', provolatile => 'v', proparallel => 'r', prorettype => 'pg_lsn', proargtypes => 'text bool bool', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index be43c04802..bbfe2f2f04 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -967,7 +967,8 @@ typedef enum */ typedef enum { - WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE = PG_WAIT_IPC, + WAIT_EVENT_BACKEND_TERMINATION = PG_WAIT_IPC, + WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE, WAIT_EVENT_BGWORKER_SHUTDOWN, WAIT_EVENT_BGWORKER_STARTUP, WAIT_EVENT_BTREE_PAGE, -- 2.25.1