On 1 July 2018 at 11:29, Michael Paquier <mich...@paquier.xyz> wrote:
> > So at the end, I would like to use the proposed patch and call it a > day. Thoughts? > > Patch looks good. I'll hijack the thread to add a few more perf/dtrace tracepoints in the WAL code, as they were also missing. Proposed rider patch attached. I've updated https://wiki.postgresql.org/wiki/Profiling_with_perf#PostgreSQL_pre-defined_tracepoint_events to document how to add tracepoints. It's not that hard to trace duration of a given function call with dtrace without such annotations, but they make it much easier to discover where in a large codebase to look, providing a form of documentation. With perf they make life much easier. I should add some more to make it easier to analyse relation extension contention on indexes and the heap, instrument btree index ops like page splits, instrument heavyweight locking (beyond LOCK_WAIT_START), etc. They're also handy for gdb - https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html - also handy for gdb's "info probes". BTW, we might want to instrument the pgstat_ counter calls and pgstat_report_wait_start / pgstat_report_wait_end, but it's easy enough to use dynamic tracepoints for those so I haven't yet. Maybe even just document them as points of interest. -- Craig Ringer http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
From 74cfc819e2c4323613191bcbf6c91f128617c353 Mon Sep 17 00:00:00 2001 From: Craig Ringer <cr...@2ndquadrant.com> Date: Mon, 2 Jul 2018 11:33:46 +0800 Subject: [PATCH v1] Add more statically defined tracepoints around xlog handling New tracepoints are added to allow for tracing of WAL flushes and segment initialization/recycling. The existing wal_insert tracepoint is renamed to wal_insert_start and paired with a wal_insert_done, so that insert durations may be recorded including fsync time. --- src/backend/access/transam/xlog.c | 14 ++++++++++++++ src/backend/access/transam/xloginsert.c | 4 +++- src/backend/utils/probes.d | 18 +++++++++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index dcfef36591..b3dead5ac0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -955,6 +955,8 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); * (LSN is the XLOG point up to which the XLOG must be flushed to disk * before the data page can be written out. This implements the basic * WAL rule "write the log before the data".) + * + * You want the WAL_INSERT tracepoints if you're looking for activity here. */ XLogRecPtr XLogInsertRecord(XLogRecData *rdata, @@ -2810,6 +2812,8 @@ XLogFlush(XLogRecPtr record) (uint32) (LogwrtResult.Flush >> 32), (uint32) LogwrtResult.Flush); #endif + TRACE_POSTGRESQL_WAL_FLUSH_START(record); + START_CRIT_SECTION(); /* @@ -2942,6 +2946,8 @@ XLogFlush(XLogRecPtr record) "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X", (uint32) (record >> 32), (uint32) record, (uint32) (LogwrtResult.Flush >> 32), (uint32) LogwrtResult.Flush); + + TRACE_POSTGRESQL_WAL_FLUSH_DONE(); } /* @@ -3190,6 +3196,8 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); + TRACE_POSTGRESQL_WAL_SEG_INIT_START(); + /* * Try to use existent file (checkpoint maker may have created it already) */ @@ -3204,7 +3212,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) errmsg("could not open file \"%s\": %m", path))); } else + { + TRACE_POSTGRESQL_WAL_SEG_INIT_DONE(*use_existent); return fd; + } } /* @@ -3327,6 +3338,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) elog(DEBUG2, "done creating and filling new WAL file"); + TRACE_POSTGRESQL_WAL_SEG_INIT_DONE(*use_existent); return fd; } @@ -10156,6 +10168,7 @@ assign_xlog_sync_method(int new_sync_method, void *extra) void issue_xlog_fsync(int fd, XLogSegNo segno) { + TRACE_POSTGRESQL_WAL_SYNC_START(fd, segno); switch (sync_method) { case SYNC_METHOD_FSYNC: @@ -10191,6 +10204,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno) elog(PANIC, "unrecognized wal_sync_method: %d", sync_method); break; } + TRACE_POSTGRESQL_WAL_SYNC_DONE(); } /* diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 5bea073a2b..8c9b57b0da 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -429,7 +429,7 @@ XLogInsert(RmgrId rmid, uint8 info) XLR_CHECK_CONSISTENCY)) != 0) elog(PANIC, "invalid xlog info mask %02X", info); - TRACE_POSTGRESQL_WAL_INSERT(rmid, info); + TRACE_POSTGRESQL_WAL_INSERT_START(rmid, info); /* * In bootstrap mode, we don't actually log anything but XLOG resources; @@ -464,6 +464,8 @@ XLogInsert(RmgrId rmid, uint8 info) XLogResetInsertion(); + TRACE_POSTGRESQL_WAL_INSERT_DONE(EndPos); + return EndPos; } diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d index ad06e8e2ea..2be68d99d6 100644 --- a/src/backend/utils/probes.d +++ b/src/backend/utils/probes.d @@ -21,6 +21,14 @@ #define Oid unsigned int #define ForkNumber int #define bool char +/* + * SystemTap doesn't like 'unsigned long long int', but except on Windows 'long + * int' is 64-bit. Windows is LLP64 and will treat it as 32-bit but our stub + * macros don't care about typing, and dtrace/stap isn't supported on Windows. + * Windows has Event Tracing (ETW) and TraceLogging, but we don't support it right + * now, so we'll deal with it if it comes up. + */ +#define XLogRecPtr unsigned long int provider postgresql { @@ -75,6 +83,7 @@ provider postgresql { probe checkpoint__done(int, int, int, int, int); probe clog__checkpoint__start(bool); probe clog__checkpoint__done(bool); + probe subtrans__checkpoint__start(bool); probe subtrans__checkpoint__done(bool); probe multixact__checkpoint__start(bool); @@ -87,7 +96,14 @@ provider postgresql { probe smgr__md__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); probe smgr__md__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); - probe wal__insert(unsigned char, unsigned char); + probe wal__insert__start(unsigned char, unsigned char); + probe wal__insert__done(XLogRecPtr); + probe wal__seg__init__start(); + probe wal__seg__init__done(bool); + probe wal__flush__start(XLogRecPtr); + probe wal__flush__done(); + probe wal__sync__start(int, int); + probe wal__sync__done(); probe wal__switch(); probe wal__buffer__write__dirty__start(); probe wal__buffer__write__dirty__done(); -- 2.14.3