On Thu, Jan 21, 2021 at 7:26 AM Tom Lane <t...@sss.pgh.pa.us> wrote: > > Craig Ringer <craig.rin...@enterprisedb.com> writes: > > On Wed, 20 Jan 2021 at 05:23, Tom Lane <t...@sss.pgh.pa.us> wrote: > >> BTW, it also looks like the patch is doing nothing to prevent the > >> backtrace from being sent to the connected client. > > > I don't see a good reason to send a bt to a client. Even though these > > backtraces won't be analysing debuginfo and populating args, locals, etc, > > it should still just go to the server log. > > Yeah. That's easier than I was thinking, we just need to > s/LOG/LOG_SERVER_ONLY/. > > >> Maybe, given all of these things, we should forget using elog at > >> all and just emit the trace with fprintf(stderr). > > > That causes quite a lot of pain with MemoryContextStats() already > > True. Given the changes discussed in the last couple messages, I don't > see any really killer reasons why we can't ship the trace through elog. > We can always try that first, and back off to fprintf if we do find > reasons why it's too unstable. >
Thanks all of them for the suggestions. Attached v3 patch which has fixes based on the suggestions. It includes the following fixes: 1) Removal of support to get callstack of all postgres process, user can get only one process callstack. 2) Update the documentation. 3) Added necessary checks for pg_print_callstack similar to pg_terminate_backend. 4) Changed LOG to LOG_SERVER_ONLY. Thoughts? Regards, Vignesh
From dd51938225881be14cce2ba0e80ae9019a3f2bfc Mon Sep 17 00:00:00 2001 From: Vignesh C <vignes...@enterprisedb.com> Date: Wed, 27 Jan 2021 18:20:13 +0530 Subject: [PATCH v3] Print backtrace of postgres process that are part of this instance. The idea here is to implement & expose pg_print_callstack function, internally what this function does is, the connected backend will send SIGUSR1 signal by setting PMSIGNAL_BACKTRACE_EMIT to the postmaster process. Once the process receives this signal it will log the backtrace of the process. --- doc/src/sgml/func.sgml | 75 +++++++++++++++++++++++++++++++++++ src/backend/postmaster/autovacuum.c | 4 ++ src/backend/postmaster/checkpointer.c | 5 +++ src/backend/postmaster/interrupt.c | 5 +++ src/backend/storage/ipc/procsignal.c | 33 +++++++++++++++ src/backend/storage/ipc/signalfuncs.c | 59 ++++++++++++++++++++++++++- src/backend/tcop/postgres.c | 38 ++++++++++++++++++ src/backend/utils/init/globals.c | 1 + src/bin/pg_ctl/t/005_backtrace.pl | 73 ++++++++++++++++++++++++++++++++++ src/include/catalog/pg_proc.dat | 5 +++ src/include/miscadmin.h | 2 + src/include/storage/pmsignal.h | 2 + src/include/storage/procsignal.h | 3 ++ src/include/tcop/tcopprot.h | 1 + 14 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 src/bin/pg_ctl/t/005_backtrace.pl diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index aa99665..4ff6e7f 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -24709,6 +24709,25 @@ SELECT collation for ('foo' COLLATE "de_DE"); however only superusers can terminate superuser backends. </para></entry> </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_print_callstack</primary> + </indexterm> + <function>pg_print_callstack</function> ( <parameter>pid</parameter> <type>integer</type> ) + <returnvalue>boolean</returnvalue> + </para> + <para> + Prints the callstack whose backend process has the specified process ID. + Callstack will be printed based on the log configuration set. See + <xref linkend="runtime-config-logging"/> for more information. This is + allowed if the calling role is a member of the role whose backend + callstack is being printed or the calling role has been granted + <literal>pg_print_callstack</literal>, however only superusers can + print callstack of superuser backends. + </para></entry> + </row> </tbody> </tgroup> </table> @@ -24728,6 +24747,62 @@ SELECT collation for ('foo' COLLATE "de_DE"); <structname>pg_stat_activity</structname> view. </para> + <para> + <function>pg_print_callstack</function> can be used to print callstack of + a backend porcess. For example: +<programlisting> +postgres=# select pg_print_callstack(pg_backend_pid()); + pg_print_callstack +-------------------- + t +(1 row) + +The callstack will be logged in the log file. For example: +2021-01-27 11:33:50.247 IST [111735] LOG: current backtrace: + postgres: postgresdba postgres [local] SELECT(LogBackTrace+0x33) [0x9501cd] + postgres: postgresdba postgres [local] SELECT(ProcessInterrupts+0x774) [0x950bac] + postgres: postgresdba postgres [local] SELECT() [0x761ee8] + postgres: postgresdba postgres [local] SELECT() [0x71bc39] + postgres: postgresdba postgres [local] SELECT() [0x71e3df] + postgres: postgresdba postgres [local] SELECT(standard_ExecutorRun+0x1d6) [0x71c25d] + postgres: postgresdba postgres [local] SELECT(ExecutorRun+0x55) [0x71c085] + postgres: postgresdba postgres [local] SELECT() [0x953f3d] + postgres: postgresdba postgres [local] SELECT(PortalRun+0x262) [0x953bf6] + postgres: postgresdba postgres [local] SELECT() [0x94dafa] + postgres: postgresdba postgres [local] SELECT(PostgresMain+0x7d7) [0x951dea] + postgres: postgresdba postgres [local] SELECT() [0x896a7b] + postgres: postgresdba postgres [local] SELECT() [0x896401] + postgres: postgresdba postgres [local] SELECT() [0x8929c5] + postgres: postgresdba postgres [local] SELECT(PostmasterMain+0x116b) [0x89229c] + postgres: postgresdba postgres [local] SELECT() [0x795efa] + /lib64/libc.so.6(__libc_start_main+0xf5) [0x7f6c12697505] + postgres: postgresdba postgres [local] SELECT() [0x484229] + +</programlisting> + You can get the file name and line number by using: +<programlisting> +1) "info line *address" from gdb on postgres executable. For example: +gdb ./postgres +GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-115.el7 +Copyright (C) 2013 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later <literal><</literal>http://gnu.org/licenses/gpl.html<literal>></literal> +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. Type "show copying" +and "show warranty" for details. +This GDB was configured as "x86_64-redhat-linux-gnu". +For bug reporting instructions, please see: +<literal><</literal>http://www.gnu.org/software/gdb/bugs/<literal>></literal>... +Reading symbols from /home/vignesh/postgres/postgres/inst/bin/postgres...done. +(gdb) info line *0x71c25d +Line 378 of "execMain.c" starts at address 0x71c25d <literal><</literal>standard_ExecutorRun+470<literal>></literal> and ends at 0x71c263 <literal><</literal>standard_ExecutorRun+476<literal>></literal>. +OR +2) Using "addr2line -e postgres address", For exmple: +addr2line -e ./postgres 0x71c25d +/home/vignesh/postgres/postgres/src/backend/executor/execMain.c:378 +</programlisting> +</para> + + </sect2> <sect2 id="functions-admin-backup"> diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 47e60ca..aa87eda 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -836,6 +836,10 @@ HandleAutoVacLauncherInterrupts(void) if (ProcSignalBarrierPending) ProcessProcSignalBarrier(); + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); + /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 54a818b..6655de3 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -57,6 +57,7 @@ #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" +#include "tcop/tcopprot.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner.h" @@ -547,6 +548,10 @@ HandleCheckpointerInterrupts(void) if (ProcSignalBarrierPending) ProcessProcSignalBarrier(); + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); + if (ConfigReloadPending) { ConfigReloadPending = false; diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index dd9136a..a6397ab 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -21,6 +21,7 @@ #include "storage/ipc.h" #include "storage/latch.h" #include "storage/procsignal.h" +#include "tcop/tcopprot.h" #include "utils/guc.h" volatile sig_atomic_t ConfigReloadPending = false; @@ -41,6 +42,10 @@ HandleMainLoopInterrupts(void) ProcessConfigFile(PGC_SIGHUP); } + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); + if (ShutdownRequestPending) proc_exit(0); } diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index c43cdd6..b0479aa 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -308,6 +308,21 @@ SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId) } /* + * EmitProcSignalPrintCallStack + * + * Send SIGUSR1 to postgres backend whose pid matches bt_pid by setting + * PROCSIG_BACKTRACE_PRINT, the postgres processes will print the backtrace once + * the signal is received. + */ +bool +EmitProcSignalPrintCallStack(int bt_pid) +{ + int ret = SendProcSignal(bt_pid, PROCSIG_BACKTRACE_PRINT, InvalidBackendId); + + return (ret == 0); +} + +/* * EmitProcSignalBarrier * Send a signal to every Postgres process * @@ -453,6 +468,21 @@ HandleProcSignalBarrierInterrupt(void) } /* + * Handle receipt of an print backtrace. + * + * Note: this is called within a signal handler! All we can do is set + * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke + * LogBackTrace(). + */ +static void +HandlePrintBacktraceInterrupt(void) +{ + InterruptPending = true; + PrintBacktracePending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + +/* * Perform global barrier related interrupt checking. * * Any backend that participates in ProcSignal signaling must arrange to @@ -686,6 +716,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + if (CheckProcSignal(PROCSIG_BACKTRACE_PRINT)) + HandlePrintBacktraceInterrupt(); + SetLatch(MyLatch); latch_sigusr1_handler(); diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 69fe23a..7b0e1bc 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -22,6 +22,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -44,8 +45,9 @@ #define SIGNAL_BACKEND_ERROR 1 #define SIGNAL_BACKEND_NOPERMISSION 2 #define SIGNAL_BACKEND_NOSUPERUSER 3 + static int -pg_signal_backend(int pid, int sig) +check_valid_pid(int pid) { PGPROC *proc = BackendPidGetProc(pid); @@ -77,6 +79,17 @@ pg_signal_backend(int pid, int sig) !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID)) return SIGNAL_BACKEND_NOPERMISSION; + return 0; +} + +static int +pg_signal_backend(int pid, int sig) +{ + int ret = check_valid_pid(pid); + + if (ret) + return ret; + /* * Can the process we just validated above end, followed by the pid being * recycled for a new process, before reaching here? Then we'd be trying @@ -215,3 +228,47 @@ pg_rotate_logfile_v2(PG_FUNCTION_ARGS) SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); } + +/* + * pg_print_callstack - print callstack of backend process. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_print_callstack(PG_FUNCTION_ARGS) +{ + int bt_pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0); + +#ifdef HAVE_BACKTRACE_SYMBOLS + { + int r = check_valid_pid(bt_pid); + + if (r == SIGNAL_BACKEND_NOSUPERUSER) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a superuser to print backtrace of superuser query process"))); + + if (r == SIGNAL_BACKEND_NOPERMISSION) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a member of the role whose query process's backtrace is being logged or member of pg_signal_backend"))); + + if (r) + PG_RETURN_BOOL(false); + + if (EmitProcSignalPrintCallStack(bt_pid)) + PG_RETURN_BOOL(true); + + ereport(WARNING, + (errmsg("failed to send signal to postmaster: %m"))); + PG_RETURN_BOOL(false); + } +#else + { + ereport(WARNING, + (errmsg("backtrace generation is not supported by this installation"))); + PG_RETURN_BOOL(false); + } +#endif +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index cb5a961..a6f0ad3 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include <execinfo.h> #include <fcntl.h> #include <limits.h> #include <signal.h> @@ -2921,6 +2922,39 @@ FloatExceptionHandler(SIGNAL_ARGS) } /* + * LogBackTrace + * + * Get the backtrace and log the backtrace to log file. + */ +void +LogBackTrace(void) +{ + int save_errno = errno; + + void *buf[100]; + int nframes; + char **strfrms; + StringInfoData errtrace; + + /* OK to process messages. Reset the flag saying there are more to do. */ + PrintBacktracePending = false; + + nframes = backtrace(buf, lengthof(buf)); + strfrms = backtrace_symbols(buf, nframes); + if (strfrms == NULL) + return; + + initStringInfo(&errtrace); + for (int i = 0; i < nframes; i++) + appendStringInfo(&errtrace, "\n%s", strfrms[i]); + free(strfrms); + + elog(LOG_SERVER_ONLY, "current backtrace:%s", errtrace.data); + + errno = save_errno; +} + +/* * RecoveryConflictInterrupt: out-of-line portion of recovery conflict * handling following receipt of SIGUSR1. Designed to be similar to die() * and StatementCancelHandler(). Called only by a normal user backend @@ -3274,6 +3308,10 @@ ProcessInterrupts(void) if (ParallelMessagePending) HandleParallelMessages(); + + /* Process printing back trace */ + if (PrintBacktracePending) + LogBackTrace(); } diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index ea28769..0cac8d2 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -37,6 +37,7 @@ volatile sig_atomic_t ProcSignalBarrierPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 CritSectionCount = 0; +volatile sig_atomic_t PrintBacktracePending = false; int MyProcPid; pg_time_t MyStartTime; diff --git a/src/bin/pg_ctl/t/005_backtrace.pl b/src/bin/pg_ctl/t/005_backtrace.pl new file mode 100644 index 0000000..46e77d9 --- /dev/null +++ b/src/bin/pg_ctl/t/005_backtrace.pl @@ -0,0 +1,73 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 2; +use Time::HiRes qw(usleep); + +# Set up node with logging collector +my $node = get_new_node('primary'); +$node->init(); +$node->append_conf( + 'postgresql.conf', qq( +logging_collector = on +lc_messages = 'C' +)); + +$node->start(); + +# Verify that log output gets to the file +$node->psql('postgres', 'select pg_print_callstack(pg_backend_pid())'); + +# might need to retry if logging collector process is slow... +my $max_attempts = 180 * 10; + +my $current_logfiles; +for (my $attempts = 0; $attempts < $max_attempts; $attempts++) +{ + eval { + $current_logfiles = slurp_file($node->data_dir . '/current_logfiles'); + }; + last unless $@; + usleep(100_000); +} +die $@ if $@; + +note "current_logfiles = $current_logfiles"; + +like( + $current_logfiles, + qr|^stderr log/postgresql-.*log$|, + 'current_logfiles is sane'); + +my $lfname = $current_logfiles; +$lfname =~ s/^stderr //; +chomp $lfname; + +my $first_logfile; +my $bt_occurence_count; + +# Verify that the backtraces of the processes are logged into logfile. +for (my $attempts = 0; $attempts < $max_attempts; $attempts++) +{ + $first_logfile = $node->data_dir . '/' . $lfname; + chomp $first_logfile; + print "file is $first_logfile"; + open my $fh, '<', $first_logfile + or die "Could not open '$first_logfile' $!"; + while (my $line = <$fh>) + { + chomp $line; + if ($line =~ m/current backtrace/) + { + $bt_occurence_count++; + } + } + last if $bt_occurence_count == 1; + usleep(100_000); +} + +is($bt_occurence_count, 1, 'found expected back trace in the log file'); + +$node->stop(); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b5f52d4..4c5ec4e 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11375,4 +11375,9 @@ proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text', prosrc => 'unicode_is_normalized' }, +# function to get the callstack of server process +{ oid => '6105', descr => 'print callstack of process', + proname => 'pg_print_callstack', provolatile => 'v', prorettype => 'bool', + proargtypes => 'int4', prosrc => 'pg_print_callstack' }, + ] diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 1bdc97e..0d00ff9 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -92,6 +92,8 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount; extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount; extern PGDLLIMPORT volatile uint32 CritSectionCount; +extern PGDLLIMPORT volatile sig_atomic_t PrintBacktracePending; + /* in tcop/postgres.c */ extern void ProcessInterrupts(void); diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index dbbed18..803f663 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -42,6 +42,8 @@ typedef enum PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */ PMSIGNAL_ADVANCE_STATE_MACHINE, /* advance postmaster's state machine */ + PMSIGNAL_BACKTRACE_EMIT, /* send PROCSIG_BACKTRACE_PRINT to backend */ + NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason; diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 4ae7dc3..3348494 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -43,6 +43,8 @@ typedef enum PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + PROCSIG_BACKTRACE_PRINT, /* ask backend to print the current backtrace */ + NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; @@ -71,5 +73,6 @@ extern void WaitForProcSignalBarrier(uint64 generation); extern void ProcessProcSignalBarrier(void); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); +extern bool EmitProcSignalPrintCallStack(int bt_pid); #endif /* PROCSIGNAL_H */ diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index e547210..0f8b74d 100644 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -71,6 +71,7 @@ extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from S extern void ProcessClientReadInterrupt(bool blocked); extern void ProcessClientWriteInterrupt(bool blocked); +extern void LogBackTrace(void); /* Called from EmitProcSignalPrintCallStack */ extern void process_postgres_switches(int argc, char *argv[], GucContext ctx, const char **dbname); extern void PostgresMain(int argc, char *argv[], -- 1.8.3.1