On Mon, Mar 15, 2021 at 10:38 AM Fujii Masao
<masao.fu...@oss.nttdata.com> wrote:
> On 2021/03/15 12:27, Bharath Rupireddy wrote:
> > On Sun, Mar 7, 2021 at 2:39 PM Bharath Rupireddy
> > <bharath.rupireddyforpostg...@gmail.com> wrote:
> >> Attaching v7 patch for further review.
> >
> > Attaching v8 patch after rebasing on to the latest master.
>
> Thanks for rebasing the patch!

Thanks for reviewing.

> -       WAIT_EVENT_XACT_GROUP_UPDATE
> +       WAIT_EVENT_XACT_GROUP_UPDATE,
> +       WAIT_EVENT_BACKEND_TERMINATION
>
> These should be listed in alphabetical order.

Done.

> In pg_wait_until_termination's do-while loop, ResetLatch() should be called. 
> Otherwise, it would enter busy-loop after any signal arrives. Because the 
> latch is kept set and WaitLatch() always exits immediately in that case.

Done.

> +       /*
> +        * Wait in steps of waittime milliseconds until this function exits or
> +        * timeout.
> +        */
> +       int64   waittime = 10;
>
> 10 ms per cycle seems too frequent?

Increased it to 100msec.

> +                       ereport(WARNING,
> +                                       (errmsg("timeout cannot be negative 
> or zero: %lld",
> +                                                       (long long int) 
> timeout)));
> +
> +                       result = false;
>
> IMO the parameter should be verified before doing the actual thing.

Done.

> Why is WARNING thrown in this case? Isn't it better to throw ERROR like 
> pg_promote() does?

Done.

Attaching v9 patch for further review.

With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com
From ddcf3f3f4a3dbabdf558b828c5728dfbbdc13a31 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddy@enterprisedb.com>
Date: Tue, 16 Mar 2021 14:46:41 +0530
Subject: [PATCH v9] pg_terminate_backend with wait and timeout

This patch adds extra default arguments wait and timeout
to pg_terminate_backend which will be used to terminate
and wait timeout milliseconds for the backend's termination.

This patch also adds a new function
pg_wait_for_backend_termination(pid, timeout) which checks
for the existence of the backend with given PID and waits
timeout milliseconds for its termination.
---
 doc/src/sgml/func.sgml                |  31 +++++-
 doc/src/sgml/monitoring.sgml          |   4 +
 src/backend/catalog/system_views.sql  |  10 ++
 src/backend/postmaster/pgstat.c       |   3 +
 src/backend/storage/ipc/signalfuncs.c | 143 +++++++++++++++++++++++++-
 src/include/catalog/pg_proc.dat       |   9 +-
 src/include/pgstat.h                  |   3 +-
 7 files changed, 194 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9492a3c6b9..0a417a0b2b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24821,7 +24821,7 @@ SELECT collation for ('foo' COLLATE "de_DE");
         <indexterm>
          <primary>pg_terminate_backend</primary>
         </indexterm>
-        <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type> )
+        <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>wait</parameter> <type>boolean</type> <literal>DEFAULT</literal> <literal>false</literal>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> )
         <returnvalue>boolean</returnvalue>
        </para>
        <para>
@@ -24829,7 +24829,34 @@ SELECT collation for ('foo' COLLATE "de_DE");
         specified process ID.  This is also allowed if the calling role
         is a member of the role whose backend is being terminated or the
         calling role has been granted <literal>pg_signal_backend</literal>,
-        however only superusers can terminate superuser backends.
+        however only superusers can terminate superuser backends. When no
+        <parameter>wait</parameter> and <parameter>timeout</parameter> are
+        specified, this function sends SIGTERM to the backend with the given
+        process ID without waiting for its actual termination and returns <literal>true</literal>.
+        Sometimes the backend process may still exist. With <parameter>wait</parameter>
+        specified as <literal>true</literal>, the function ensures that the
+        backend process is actually terminated. It keeps checking for the
+        existence of the backend process either until the given <parameter>timeout</parameter>
+        or default 100 milliseconds and returns <literal>true</literal>. On
+        timeout a warning is emitted and <literal>false</literal> is returned.
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_wait_for_backend_termination</primary>
+        </indexterm>
+        <function>pg_wait_for_backend_termination</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> )
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Checks the existence of backend process with specified process ID. It
+        waits until the given <parameter>timeout</parameter> or the default 100
+        milliseconds. <literal>true</literal> is returned when the backend is
+        found to be not existing within the <parameter>timeout</parameter> milliseconds.
+        On timeout, a warning is emitted and <literal>false</literal> is
+        returned.
        </para></entry>
       </row>
      </tbody>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index db4b4e460c..f362184d5e 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1569,6 +1569,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
     </thead>
 
     <tbody>
+    <row>
+      <entry><literal>BackendTermination</literal></entry>
+      <entry>Waiting for the termination of a backend.</entry>
+     </row>
      <row>
       <entry><literal>BackupWaitWalArchive</literal></entry>
       <entry>Waiting for WAL files required for a backup to be successfully
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0dca65dc7b..85521546e3 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1277,6 +1277,16 @@ CREATE OR REPLACE FUNCTION
   RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
   PARALLEL SAFE;
 
+CREATE OR REPLACE FUNCTION
+  pg_terminate_backend(pid integer, wait boolean DEFAULT false, timeout int8 DEFAULT 100)
+  RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_terminate_backend'
+  PARALLEL SAFE;
+
+CREATE OR REPLACE FUNCTION
+  pg_wait_for_backend_termination(pid integer, timeout int8 DEFAULT 100)
+  RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_wait_for_backend_termination'
+  PARALLEL SAFE;
+
 -- legacy definition for compatibility with 9.3
 CREATE OR REPLACE FUNCTION
   json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b1e2d94951..2f72241d2f 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -4130,6 +4130,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
 		case WAIT_EVENT_XACT_GROUP_UPDATE:
 			event_name = "XactGroupUpdate";
 			break;
+		case WAIT_EVENT_BACKEND_TERMINATION:
+			event_name = "BackendTermination";
+			break;
 			/* no default case, so that compiler will warn */
 	}
 
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index 69fe23a256..8d879af47c 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -18,6 +18,7 @@
 
 #include "catalog/pg_authid.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/syslogger.h"
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
@@ -126,15 +127,103 @@ pg_cancel_backend(PG_FUNCTION_ARGS)
 }
 
 /*
- * Signal to terminate a backend process.  This is allowed if you are a member
- * of the role whose process is being terminated.
+ * Wait until there is no backend process with the given PID and return true.
+ * On timeout, a warning is emitted and false is returned.
+ */
+static bool
+pg_wait_until_termination(int pid, int64 timeout)
+{
+	/*
+	 * Wait in steps of waittime milliseconds until this function exits or
+	 * timeout.
+	 */
+	int64	waittime = 100;
+	/*
+	 * Initially remaining time is the entire timeout specified by the user.
+	 */
+	int64	remainingtime = timeout;
+
+	/*
+	 * Check existence of the backend. If it doesn't exist, then check for the
+	 * errno ESRCH that confirms it and return true. If the backend still
+	 * exists, then wait for waittime milliseconds, again check for the
+	 * existence. Repeat this until timeout or an error occurs or a pending
+	 * interrupt such as query cancel gets processed.
+	 */
+	do
+	{
+		if (remainingtime < waittime)
+			waittime = remainingtime;
+
+		if (kill(pid, 0) == -1)
+		{
+			if (errno == ESRCH)
+				return true;
+			else
+			{
+				ereport(WARNING,
+						(errmsg("could not check the existence of the backend with PID %d: %m",
+								pid)));
+				return false;
+			}
+		}
+
+		/* Process interrupts, if any, before going for wait. */
+		CHECK_FOR_INTERRUPTS();
+
+		(void) WaitLatch(MyLatch,
+						 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+						 waittime,
+						 WAIT_EVENT_BACKEND_TERMINATION);
+
+		ResetLatch(MyLatch);
+
+		remainingtime -= waittime;
+	} while (remainingtime > 0);
+
+	ereport(WARNING,
+			(errmsg("could not wait for the termination of the backend with PID %d within %lld milliseconds",
+					pid, (long long int) timeout)));
+
+	return false;
+}
+
+/*
+ * Signal to terminate a backend process. This is allowed if you are a member
+ * of the role whose process is being terminated. If wait input argument is
+ * true, then wait until given timeout (default 100) milliseconds or no backend
+ * has the given PID. On timeout, a warning is emitted. Self termination is
+ * allowed but waiting is not, because once the backend is self-terminated
+ * there is no point in waiting for it to go away. If wait input argument is
+ * false (which is default), then this function just signals the backend and
+ * exits.
  *
  * Note that only superusers can signal superuser-owned processes.
  */
 Datum
 pg_terminate_backend(PG_FUNCTION_ARGS)
 {
-	int			r = pg_signal_backend(PG_GETARG_INT32(0), SIGTERM);
+	int	 pid;
+	int	 r;
+	bool wait;
+	int timeout;
+	bool result;
+
+	pid = PG_GETARG_INT32(0);
+	wait = PG_GETARG_BOOL(1);
+
+	/* If asked to wait, check whether the timeout value is valid or not. */
+	if (wait && pid != MyProcPid)
+	{
+		timeout = PG_GETARG_INT64(2);
+
+		if (timeout <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("\"timeout\" must not be negative or zero")));
+	}
+
+	r = pg_signal_backend(pid, SIGTERM);
 
 	if (r == SIGNAL_BACKEND_NOSUPERUSER)
 		ereport(ERROR,
@@ -146,7 +235,53 @@ pg_terminate_backend(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
 
-	PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS);
+	result = (r == SIGNAL_BACKEND_SUCCESS);
+
+	/*
+	 * Wait only if requested and the termination is successful. Self
+	 * termination is allowed but waiting is not.
+	 */
+	if (wait && pid != MyProcPid && result)
+		result = pg_wait_until_termination(pid, timeout);
+
+	PG_RETURN_BOOL(result);
+}
+
+/*
+ * This function waits for a backend with the given PID to be terminated or
+ * until the timeout milliseconds occurs. If the PID is of the backend which
+ * issued this function, either timeout can occur or the function can succeed
+ * if the backend gets terminated by some other process, say postmaster.
+ */
+Datum
+pg_wait_for_backend_termination(PG_FUNCTION_ARGS)
+{
+	int	 pid;
+	int64	timeout;
+	PGPROC	*proc = NULL;
+	bool	result = false;
+
+	pid = PG_GETARG_INT32(0);
+	timeout = PG_GETARG_INT64(1);
+
+	if (timeout <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				errmsg("\"timeout\" must not be negative or zero")));
+
+	proc = BackendPidGetProc(pid);
+
+	if (proc == NULL)
+	{
+		ereport(WARNING,
+				(errmsg("PID %d is not a PostgreSQL server process", pid)));
+
+		PG_RETURN_BOOL(result);
+	}
+
+	result = pg_wait_until_termination(pid, timeout);
+
+	PG_RETURN_BOOL(result);
 }
 
 /*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 93393fcfd4..3f158de1dc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6143,9 +6143,14 @@
 { oid => '2171', descr => 'cancel a server process\' current query',
   proname => 'pg_cancel_backend', provolatile => 'v', prorettype => 'bool',
   proargtypes => 'int4', prosrc => 'pg_cancel_backend' },
-{ oid => '2096', descr => 'terminate a server process',
+{ oid => '2096', descr => 'terminate a backend process and if specified, wait for its exit or until timeout occurs',
   proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool',
-  proargtypes => 'int4', prosrc => 'pg_terminate_backend' },
+  proargtypes => 'int4 bool int8', proargnames => '{pid,wait,timeout}',
+  prosrc => 'pg_terminate_backend' },
+{ oid => '16386', descr => 'wait for a backend process exit or timeout occurs',
+  proname => 'pg_wait_for_backend_termination', provolatile => 'v', prorettype => 'bool',
+  proargtypes => 'int4 int8', proargnames => '{pid,timeout}',
+  prosrc => 'pg_wait_for_backend_termination' },
 { oid => '2172', descr => 'prepare for taking an online backup',
   proname => 'pg_start_backup', provolatile => 'v', proparallel => 'r',
   prorettype => 'pg_lsn', proargtypes => 'text bool bool',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index be43c04802..bbfe2f2f04 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -967,7 +967,8 @@ typedef enum
  */
 typedef enum
 {
-	WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE = PG_WAIT_IPC,
+	WAIT_EVENT_BACKEND_TERMINATION = PG_WAIT_IPC,
+	WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE,
 	WAIT_EVENT_BGWORKER_SHUTDOWN,
 	WAIT_EVENT_BGWORKER_STARTUP,
 	WAIT_EVENT_BTREE_PAGE,
-- 
2.25.1

Reply via email to