On 10/05/18 09:32, Hubert Zhang wrote:
Hi all,

I want to support canceling for a plpython query which may be a busy loop.

I found some discussions on pgsql-hackers 2 years ago. Below is the link.

https://www.postgresql.org/message-id/cafywgj3+xg7ecl2nu-mxx6p+o6c895pm3myz-b+9n9dffeh...@mail.gmail.com

Mario wrote a patch to fix this problem at that time
*https://github.com/CartoDB/postgres/commit/d587d8a6e4f035cc45e1d84fc46aa7c3ab0344c3
<https://github.com/CartoDB/postgres/commit/d587d8a6e4f035cc45e1d84fc46aa7c3ab0344c3>*
<https://github.com/CartoDB/postgres/commit/d587d8a6e4f035cc45e1d84fc46aa7c3ab0344c3>

The main logic is to register a new signal handler for SIGINT/SIGTERM
and link the old signal handler in the chain.

static void PLy_python_interruption_handler()
{
PyErr_SetString(PyExc_RuntimeError, "test except");
return NULL;
}
static void
PLy_handle_interrupt(int sig)
{
// custom interruption
int added = Py_AddPendingCall(PLy_python_interruption_handler, NULL);
if (coreIntHandler) {
(*coreIntHandler)(sig);
}
}

Does anyone have some comments on this patch?

PostgreSQL assumes to have control of all the signals. Although I don't foresee any changes in this area any time soon, there's no guarantee that overriding the SIGINT/SIGTERM will do what you want in the future. Also, catching SIGINT/SIGTERM still won't react to recovery conflict interrupts.

In that old thread, I think the conclusion was that we should provide a hook in the backend for this, rather than override the signal handler directly. We could then call the hook whenever InterruptPending is set. No-one got around to write a patch to do that, though.

As for me, I think handler function should call PyErr_SetInterrupt()
instead of PyErr_SetString(PyExc_RuntimeError, "test except");

Hmm. I tested that, but because the Python's default SIGINT handler is not installed, PyErr_SetInterrupt() will actually throw an error:

TypeError: 'NoneType' object is not callable

I came up with the attached patch, which is similar to Mario's, but it introduces a new "hook" for this.

One little problem remains: The Py_AddPendingCall() call is made unconditionally in the signal handler, even if no Python code is currently being executed. The pending call is queued up until the next time you run a PL/Python function, which could be long after the original statement was canceled. We need some extra checks to only raise the Python exception, if the Python interpreter is currently active.

- Heikki
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index f4133953be..9a8255ab1a 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -100,6 +100,22 @@ int			max_stack_depth = 100;
 /* wait N seconds to allow attach from a debugger */
 int			PostAuthDelay = 0;
 
+/*
+ * Hook for extensions, to get notified when query cancel or DIE signal is
+ * received. This allows the extension to stop whatever it's doing as
+ * quickly as possible. Normally, you would sprinkle your code with
+ * CHECK_FOR_INTERRUPTS() in suitable places, but sometimes that's not
+ * possible, for example because you call a slow function in a 3rd party
+ * library that you have no control over. In the hook function, you might
+ * be able to abort such a slow operation somehow.
+ *
+ * This gets called after setting ProcDiePending, QueryCancelPending and/or
+ * RecoveryConfictPending, so the hook function can check those to
+ * determine what event happened.
+ *
+ * NB: This is called from a signal handler!
+ */
+cancel_pending_hook_type cancel_pending_hook = NULL;
 
 
 /* ----------------
@@ -2664,6 +2680,9 @@ die(SIGNAL_ARGS)
 	{
 		InterruptPending = true;
 		ProcDiePending = true;
+
+		if (cancel_pending_hook)
+			(*cancel_pending_hook)();
 	}
 
 	/* If we're still here, waken anything waiting on the process latch */
@@ -2697,6 +2716,9 @@ StatementCancelHandler(SIGNAL_ARGS)
 	{
 		InterruptPending = true;
 		QueryCancelPending = true;
+
+		if (cancel_pending_hook)
+			(*cancel_pending_hook)();
 	}
 
 	/* If we're still here, waken anything waiting on the process latch */
@@ -2819,6 +2841,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
 					RecoveryConflictPending = true;
 					QueryCancelPending = true;
 					InterruptPending = true;
+					if (cancel_pending_hook)
+						(*cancel_pending_hook)();
 					break;
 				}
 
@@ -2829,6 +2853,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
 				RecoveryConflictPending = true;
 				ProcDiePending = true;
 				InterruptPending = true;
+				if (cancel_pending_hook)
+					(*cancel_pending_hook)();
 				break;
 
 			default:
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index e167ee8fcb..f6648221ca 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -93,6 +93,10 @@ extern PGDLLIMPORT volatile uint32 CritSectionCount;
 /* in tcop/postgres.c */
 extern void ProcessInterrupts(void);
 
+/* Hook get notified when QueryCancelPending or ProcDiePending is raised */
+typedef void (*cancel_pending_hook_type) (void);
+extern PGDLLIMPORT cancel_pending_hook_type cancel_pending_hook;
+
 #ifndef WIN32
 
 #define CHECK_FOR_INTERRUPTS() \
diff --git a/src/pl/plpython/plpy_elog.c b/src/pl/plpython/plpy_elog.c
index e244104fed..07eac036ad 100644
--- a/src/pl/plpython/plpy_elog.c
+++ b/src/pl/plpython/plpy_elog.c
@@ -7,6 +7,7 @@
 #include "postgres.h"
 
 #include "lib/stringinfo.h"
+#include "miscadmin.h"
 
 #include "plpython.h"
 
@@ -120,6 +121,15 @@ PLy_elog_impl(int elevel, const char *fmt,...)
 
 	PG_TRY();
 	{
+		/*
+		 * If the error was a KeyboardException that we raised because
+		 * of query cancellation, then CHECK_FOR_INTERRUPTS() will throw
+		 * a better error message than we do here, with
+		 * "canceling statement due to user request" or similar message.
+		 * Give it a chance.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
 		ereport(elevel,
 				(errcode(sqlerrcode ? sqlerrcode : ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
 				 errmsg_internal("%s", primary ? primary : "no exception data"),
diff --git a/src/pl/plpython/plpy_main.c b/src/pl/plpython/plpy_main.c
index 6a66eba176..f11b0d2f6e 100644
--- a/src/pl/plpython/plpy_main.c
+++ b/src/pl/plpython/plpy_main.c
@@ -73,6 +73,9 @@ PyObject   *PLy_interp_globals = NULL;
 /* this doesn't need to be global; use PLy_current_execution_context() */
 static PLyExecutionContext *PLy_execution_contexts = NULL;
 
+static cancel_pending_hook_type prev_cancel_pending_hook;
+
+static void PLy_handle_cancel_interrupt(void);
 
 void
 _PG_init(void)
@@ -141,6 +144,10 @@ PLy_initialize(void)
 	if (PyErr_Occurred())
 		PLy_elog(FATAL, "untrapped error in initialization");
 
+	/* Get notified on SIGTERM or query cancellations */
+	prev_cancel_pending_hook = cancel_pending_hook;
+	cancel_pending_hook = PLy_handle_cancel_interrupt;
+
 	init_procedure_caches();
 
 	explicit_subtransactions = NIL;
@@ -464,3 +471,37 @@ PLy_pop_execution_context(void)
 		MemoryContextDelete(context->scratch_ctx);
 	pfree(context);
 }
+
+/*
+ * Raise a KeyboardInterrupt exception, to simulate a SIGINT.
+ */
+static int
+PLy_python_cancel_handler(void *arg)
+{
+	PyErr_SetNone(PyExc_KeyboardInterrupt);
+
+	/* return -1 to indicate that we set an exception. */
+	return -1;
+}
+
+/*
+ * Hook function, called when current query is being cancelled
+ * (on e.g. SIGINT or SIGTERM)
+ *
+ * NB: This is called from a signal handler!
+ */
+static void
+PLy_handle_cancel_interrupt(void)
+{
+	/*
+	 * We can't do much in a signal handler, so just tell the Python
+	 * interpreter to call us back when possible.
+	 *
+	 * We don't bother to check the return value, as there's nothing we could
+	 * do if it fails for some reason.
+	 */
+	(void) Py_AddPendingCall(PLy_python_cancel_handler, NULL);
+
+	if (prev_cancel_pending_hook)
+		prev_cancel_pending_hook();
+}

Reply via email to