Hi,

During the discussion about memory contexts dumping[1], there
was a comment that exposing not only memory contexts but also
query plans and untruncated query string would be useful.

I also feel that it would be nice when thinking about situations
such as troubleshooting a long-running query on production
environments where we cannot use debuggers.

At that point of the above comment, I was considering exposing
such information on the shared memory.
However, since memory contexts are now exposed on the log by
pg_log_backend_memory_contexts(PID), I'm thinking about
defining a function that logs the plan of a running query and
untruncated query string on the specified PID in the same way
as below.

  postgres=# SELECT * FROM pg_log_current_plan(2155192);
   pg_log_current_plan
  ---------------------
   t
  (1 row)

  $ tail -f data/log/postgresql-2021-05-12.log

2021-05-12 17:37:19.481 JST [2155192] LOG: logging the plan of running query on PID 2155192 Query Text: SELECT a.filler FROM pgbench_accounts a JOIN pgbench_accounts b ON a.aid = b.aid;
          Merge Join  (cost=0.85..83357.85 rows=1000000 width=85)
            Merge Cond: (a.aid = b.aid)
-> Index Scan using pgbench_accounts_pkey on pgbench_accounts a (cost=0.42..42377.43 rows=1000000 width=89) -> Index Only Scan using pgbench_accounts_pkey on pgbench_accounts b (cost=0.42..25980.42 rows=1000000 width=4)


Attached a PoC patch.

Any thoughts?

[1] https://www.postgresql.org/message-id/CA%2BTgmobkpFV0UB67kzXuD36--OFHwz1bs%3DL_6PZbD4nxKqUQMw%40mail.gmail.com


Regards,

--
Atsushi Torikoshi
NTT DATA CORPORATION
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 4d1f1794ca..5959bbfddc 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24939,6 +24939,26 @@ SELECT collation for ('foo' COLLATE "de_DE");
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_log_current_plan</primary>
+        </indexterm>
+        <function>pg_log_current_plan</function> ( <parameter>pid</parameter> <type>integer</type> )
+        <returnvalue>boolean</returnvalue>
+       </para>
+       <para>
+        Requests to log the untruncated query string and its plan of the
+        backend with the specified process ID.
+        They will be logged at <literal>LOG</literal> message level and
+        will appear in the server log based on the log
+        configuration set (See <xref linkend="runtime-config-logging"/>
+        for more information), but will not be sent to the client
+        regardless of <xref linkend="guc-client-min-messages"/>.
+        Only superusers can request to log the plan of running query.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 9867da83bc..edbe1c6829 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -21,6 +21,7 @@
 #include "executor/nodeHash.h"
 #include "foreign/fdwapi.h"
 #include "jit/jit.h"
+#include "miscadmin.h"
 #include "nodes/extensible.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -28,6 +29,8 @@
 #include "parser/parsetree.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/bufmgr.h"
+#include "storage/procarray.h"
+#include "tcop/pquery.h"
 #include "tcop/tcopprot.h"
 #include "utils/builtins.h"
 #include "utils/guc_tables.h"
@@ -4928,3 +4931,116 @@ escape_yaml(StringInfo buf, const char *str)
 {
 	escape_json(buf, str);
 }
+
+/*
+ * HandleLogCurrentPlanInterrupt
+ *		Handle receipt of an interrupt indicating logging the
+ *		plan of a running query.
+ *
+ * All the actual work is deferred to ProcessLogExplainInterrupt(),
+ * because we cannot safely emit a log message inside the signal handler.
+ */
+void
+HandleLogCurrentPlanInterrupt(void)
+{
+	InterruptPending = true;
+	LogCurrentPlanPending = true;
+	/* latch will be set by procsignal_sigusr1_handler */
+}
+
+/*
+ * ProcessLogCurrentPlanInterrupt
+ * 		Perform logging the plan of running query on this
+ * 		backend process.
+ *
+ * Any backend that participates in ProcSignal signaling must arrange
+ * to call this function if we see LogCurrentPlanPending set.
+ * It is called from CHECK_FOR_INTERRUPTS(), which is enough because
+ * the target process for logging the plan of running query.
+ */
+void
+ProcessLogCurrentPlanInterrupt(void)
+{
+	ExplainState *es = NewExplainState();
+
+	LogCurrentPlanPending = false;
+
+	es->format = EXPLAIN_FORMAT_TEXT;
+	es->summary = true;
+
+	if (ActivePortal && ActivePortal->queryDesc != NULL)
+	{
+		ExplainQueryText(es, ActivePortal->queryDesc);
+		ExplainPrintPlan(es, ActivePortal->queryDesc);
+
+		/* Remove last line break */
+		if (es->str->len > 0 && es->str->data[es->str->len - 1] == '\n')
+			es->str->data[--es->str->len] = '\0';
+
+		ereport(LOG,
+				(errmsg("logging the plan of running query on PID %d\n%s",
+					MyProcPid,
+					es->str->data),
+				 errhidestmt(true)));
+	}
+	else
+		ereport(LOG,
+				(errmsg("PID %d is not running a query now",
+					MyProcPid)));
+}
+
+/*
+ * pg_log_current_plan
+ *		Signal a backend process to log the plan of running
+ *		query.
+ *
+ * Only superusers are allowed to signal to log the plan because
+ * allowing any users to issue this request at an unbounded rate
+ * would cause lots of log messages and which can lead to denial
+ * of service.
+ *
+ * On receipt of this signal, a backend sets the flag in the signal
+ * handler, which causes the next CHECK_FOR_INTERRUPTS() to log the
+ * plan.
+ */
+Datum
+pg_log_current_plan(PG_FUNCTION_ARGS)
+{
+	int			pid = PG_GETARG_INT32(0);
+	PGPROC	   *proc = BackendPidGetProc(pid);
+
+	/*
+	 * BackendPidGetProc returns NULL if the pid isn't valid; but by the time
+	 * we reach kill(), a process for which we get a valid proc here might
+	 * have terminated on its own.  There's no way to acquire a lock on an
+	 * arbitrary process to prevent that. But since this mechanism is usually
+	 * used to look into the plan of long running query, that it might end on
+	 * its own first and its plan is not logged is not a problem.
+	 */
+	if (proc == NULL)
+	{
+		/*
+		 * This is just a warning so a loop-through-resultset will not abort
+		 * if one backend terminated on its own during the run.
+		 */
+		ereport(WARNING,
+				(errmsg("PID %d is not a PostgreSQL server process", pid)));
+		PG_RETURN_BOOL(false);
+	}
+
+	/* Only allow superusers to log the plan of running query. */
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("must be a superuser to log the plan of running query")));
+
+	if (SendProcSignal(pid, PROCSIG_LOG_CURRENT_PLAN, proc->backendId) < 0)
+	{
+		/* Again, just a warning to allow loops */
+		ereport(WARNING,
+				(errmsg("could not send signal to process %d: %m", pid)));
+		PG_RETURN_BOOL(false);
+	}
+
+	PG_RETURN_BOOL(true);
+}
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index eac6895141..3aaf4b4818 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -20,6 +20,7 @@
 #include "access/parallel.h"
 #include "port/pg_bitutils.h"
 #include "commands/async.h"
+#include "commands/explain.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "replication/walsender.h"
@@ -661,6 +662,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT))
 		HandleLogMemoryContextInterrupt();
 
+	if (CheckProcSignal(PROCSIG_LOG_CURRENT_PLAN))
+		HandleLogCurrentPlanInterrupt();
+
 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
 
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2d6d145ecc..f456f0b7d5 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -41,6 +41,7 @@
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/async.h"
+#include "commands/explain.h"
 #include "commands/prepare.h"
 #include "executor/spi.h"
 #include "jit/jit.h"
@@ -3356,6 +3357,9 @@ ProcessInterrupts(void)
 
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
+
+	if (LogCurrentPlanPending)
+		ProcessLogCurrentPlanInterrupt();
 }
 
 
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 381d9e548d..126ed10362 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -36,6 +36,7 @@ volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
 volatile sig_atomic_t IdleSessionTimeoutPending = false;
 volatile sig_atomic_t ProcSignalBarrierPending = false;
 volatile sig_atomic_t LogMemoryContextPending = false;
+volatile sig_atomic_t LogCurrentPlanPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
 volatile uint32 CritSectionCount = 0;
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 26c3fc0f6b..461c1efe91 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7975,6 +7975,12 @@
   provolatile => 'v', prorettype => 'bool',
   proargtypes => 'int4', prosrc => 'pg_log_backend_memory_contexts' },
 
+# logging the plan of running query on the specified backend
+{ oid => '4544', descr => 'log the plan of running query on the specified backend',
+  proname => 'pg_log_current_plan',
+  provolatile => 'v', prorettype => 'bool',
+  proargtypes => 'int4', prosrc => 'pg_log_current_plan' },
+
 # non-persistent series generator
 { oid => '1066', descr => 'non-persistent series generator',
   proname => 'generate_series', prorows => '1000',
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index e94d9e49cf..fa81beb553 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -124,4 +124,6 @@ extern void ExplainOpenGroup(const char *objtype, const char *labelname,
 extern void ExplainCloseGroup(const char *objtype, const char *labelname,
 							  bool labeled, ExplainState *es);
 
+extern void HandleLogCurrentPlanInterrupt(void);
+extern void ProcessLogCurrentPlanInterrupt(void);
 #endif							/* EXPLAIN_H */
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 95202d37af..8c03a515fd 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
 extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
 extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
 extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending;
+extern PGDLLIMPORT volatile sig_atomic_t LogCurrentPlanPending;
 
 extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
 extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index eec186be2e..a80e7999e8 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -35,6 +35,7 @@ typedef enum
 	PROCSIG_WALSND_INIT_STOPPING,	/* ask walsenders to prepare for shutdown  */
 	PROCSIG_BARRIER,			/* global barrier interrupt  */
 	PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */
+	PROCSIG_LOG_CURRENT_PLAN, /* ask backend to log the plan of current query */
 
 	/* Recovery conflict reasons */
 	PROCSIG_RECOVERY_CONFLICT_DATABASE,

Reply via email to