Attached is a rebased version in light of 8aaa04b32d - Melanie
From 789d4bf1fb749a26523dbcd2c69795916b711c68 Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Tue, 21 Mar 2023 16:00:55 -0400 Subject: [PATCH v8 1/4] Count IO time for temp relation writes
Both pgstat_database and pgBufferUsage write times failed to count timing for flushes of dirty local buffers when acquiring a new local buffer for a temporary relation block. --- src/backend/storage/buffer/localbuf.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 6f9e7eda57..ecccb6c1a9 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -114,6 +114,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, LocalBufferLookupEnt *hresult; BufferDesc *bufHdr; int b; + instr_time io_start, + io_time; int trycounter; bool found; uint32 buf_state; @@ -220,6 +222,11 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); + if (track_io_timing) + INSTR_TIME_SET_CURRENT(io_start); + else + INSTR_TIME_SET_ZERO(io_start); + /* And write... */ smgrwrite(oreln, BufTagGetForkNum(&bufHdr->tag), @@ -233,6 +240,15 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, /* Temporary table I/O does not use Buffer Access Strategies */ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); + + if (track_io_timing) + { + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, io_start); + pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); + INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); + } + pgBufferUsage.local_blks_written++; } -- 2.37.2
From 726ab546a11707baa167106d5b6266451dfae445 Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Tue, 21 Mar 2023 20:36:10 -0400 Subject: [PATCH v8 4/4] pgstat_database uses pgstat_io time counters Use pgstat_io's pending counters to increment pgStatBlockWriteTime and pgStatBlockReadTime. --- src/backend/utils/activity/pgstat_io.c | 14 ++++++++------ src/include/pgstat.h | 4 ---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index 4e98c4749a..905566decd 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -124,13 +124,11 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, if (io_op == IOOP_WRITE) { - pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); if (io_object == IOOBJECT_RELATION) INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); } else if (io_op == IOOP_READ) { - pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); if (io_object == IOOBJECT_RELATION) INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); } @@ -181,15 +179,19 @@ pgstat_flush_io(bool nowait) { for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) { - instr_time time; + PgStat_Counter time; bktype_shstats->counts[io_object][io_context][io_op] += PendingIOStats.counts[io_object][io_context][io_op]; - time = PendingIOStats.pending_times[io_object][io_context][io_op]; + time = INSTR_TIME_GET_MICROSEC(PendingIOStats.pending_times[io_object][io_context][io_op]); - bktype_shstats->times[io_object][io_context][io_op] += - INSTR_TIME_GET_MICROSEC(time); + bktype_shstats->times[io_object][io_context][io_op] += time; + + if (io_op == IOOP_WRITE) + pgStatBlockWriteTime += time; + else if (io_op == IOOP_READ) + pgStatBlockReadTime += time; } } } diff --git a/src/include/pgstat.h b/src/include/pgstat.h index bf54c6defe..833476a2bb 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -544,10 +544,6 @@ extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount); extern void pgstat_report_checksum_failure(void); extern void pgstat_report_connect(Oid dboid); -#define pgstat_count_buffer_read_time(n) \ - (pgStatBlockReadTime += (n)) -#define pgstat_count_buffer_write_time(n) \ - (pgStatBlockWriteTime += (n)) #define pgstat_count_conn_active_time(n) \ (pgStatActiveTime += (n)) #define pgstat_count_conn_txn_idle_time(n) \ -- 2.37.2
From 2bdad725133395ded199ecc726096e052d6e654b Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Fri, 31 Mar 2023 15:32:36 -0400 Subject: [PATCH v8 3/4] Track IO times in pg_stat_io Add IO timing for reads, writes, extends, and fsyncs to pg_stat_io. Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com> Reviewed-by: Andres Freund <and...@anarazel.de> Discussion: https://www.postgresql.org/message-id/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com --- doc/src/sgml/monitoring.sgml | 59 ++++++++++ src/backend/catalog/system_views.sql | 4 + src/backend/storage/buffer/bufmgr.c | 67 ++++-------- src/backend/storage/buffer/localbuf.c | 21 +--- src/backend/storage/smgr/md.c | 31 ++++-- src/backend/utils/activity/pgstat_io.c | 143 +++++++++++++++++++++---- src/backend/utils/adt/pgstatfuncs.c | 48 ++++++++- src/include/catalog/pg_proc.dat | 6 +- src/include/pgstat.h | 7 +- src/test/regress/expected/rules.out | 6 +- 10 files changed, 287 insertions(+), 105 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index d5a45f996d..7b17d38887 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3814,6 +3814,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i </entry> </row> + <row> + <entry role="catalog_table_entry"> + <para role="column_definition"> + <structfield>read_time</structfield> <type>double precision</type> + </para> + <para> + Time spent in read operations in milliseconds (if <xref + linkend="guc-track-io-timing"/> is enabled, otherwise zero) + </para> + </entry> + </row> + <row> <entry role="catalog_table_entry"> <para role="column_definition"> @@ -3826,6 +3838,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i </entry> </row> + <row> + <entry role="catalog_table_entry"> + <para role="column_definition"> + <structfield>write_time</structfield> <type>double precision</type> + </para> + <para> + Time spent in write operations in milliseconds (if <xref + linkend="guc-track-io-timing"/> is enabled, otherwise zero) + </para> + </entry> + </row> + <row> <entry role="catalog_table_entry"> <para role="column_definition"> @@ -3838,6 +3862,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i </entry> </row> + <row> + <entry role="catalog_table_entry"> + <para role="column_definition"> + <structfield>extend_time</structfield> <type>double precision</type> + </para> + <para> + Time spent in extend operations in milliseconds (if <xref + linkend="guc-track-io-timing"/> is enabled, otherwise zero) + </para> + </entry> + </row> + <row> <entry role="catalog_table_entry"> <para role="column_definition"> @@ -3913,6 +3949,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i </entry> </row> + <row> + <entry role="catalog_table_entry"> + <para role="column_definition"> + <structfield>fsync_time</structfield> <type>double precision</type> + </para> + <para> + Time spent in fsync operations in milliseconds (if <xref + linkend="guc-track-io-timing"/> is enabled, otherwise zero) + </para> + </entry> + </row> + <row> <entry role="catalog_table_entry"> <para role="column_definition"> @@ -3978,6 +4026,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i </itemizedlist> </para> + <note> + <para> + Columns tracking I/O time will only be non-zero when <xref + linkend="guc-track-io-timing"/> is enabled. The user should be careful when + using these columns in combination with their corresponding operations to + ensure that <varname>track_io_timing</varname> was enabled for the entire + time since the last reset. + </para> + </note> + + </sect2> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 574cbc2e44..ee2973c0a6 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1125,13 +1125,17 @@ SELECT b.io_object, b.io_context, b.reads, + b.read_time, b.writes, + b.write_time, b.extends, + b.extend_time, b.op_bytes, b.hits, b.evictions, b.reuses, b.fsyncs, + b.fsync_time, b.stats_reset FROM pg_stat_get_io() b; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 05e98d5994..ebceff3b11 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -995,12 +995,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isExtend) { + instr_time io_start; + /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); + + io_start = pgstat_prepare_io_time(); + /* don't set checksum for all-zero page */ smgrextend(smgr, forkNum, blockNum, bufBlock, false); - pgstat_count_io_op(io_object, io_context, IOOP_EXTEND); + pgstat_count_io_op_time(io_object, io_context, + IOOP_EXTEND, io_start); /* * NB: we're *not* doing a ScheduleBufferTagForWriteback here; @@ -1019,25 +1025,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, MemSet((char *) bufBlock, 0, BLCKSZ); else { - instr_time io_start, - io_time; + instr_time io_start; - if (track_io_timing) - INSTR_TIME_SET_CURRENT(io_start); - else - INSTR_TIME_SET_ZERO(io_start); + io_start = pgstat_prepare_io_time(); smgrread(smgr, forkNum, blockNum, bufBlock); - pgstat_count_io_op(io_object, io_context, IOOP_READ); - - if (track_io_timing) - { - INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_SUBTRACT(io_time, io_start); - pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); - INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); - } + pgstat_count_io_op_time(io_object, io_context, + IOOP_READ, io_start); /* check for garbage data */ if (!PageIsVerifiedExtended((Page) bufBlock, blockNum, @@ -2853,8 +2848,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, { XLogRecPtr recptr; ErrorContextCallback errcallback; - instr_time io_start, - io_time; + instr_time io_start; Block bufBlock; char *bufToWrite; uint32 buf_state; @@ -2929,10 +2923,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); - if (track_io_timing) - INSTR_TIME_SET_CURRENT(io_start); - else - INSTR_TIME_SET_ZERO(io_start); + io_start = pgstat_prepare_io_time(); /* * bufToWrite is either the shared buffer or a copy, as appropriate. @@ -2961,15 +2952,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, * When a strategy is not in use, the write can only be a "regular" write * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE). */ - pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE); - - if (track_io_timing) - { - INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_SUBTRACT(io_time, io_start); - pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); - INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); - } + pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, + IOOP_WRITE, io_start); pgBufferUsage.shared_blks_written++; @@ -3571,14 +3555,13 @@ FlushRelationBuffers(Relation rel) { int i; BufferDesc *bufHdr; - instr_time io_start, - io_time; if (RelationUsesLocalBuffers(rel)) { for (i = 0; i < NLocBuffer; i++) { uint32 buf_state; + instr_time io_start; bufHdr = GetLocalBufferDescriptor(i); if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) && @@ -3598,10 +3581,7 @@ FlushRelationBuffers(Relation rel) PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); - if (track_io_timing) - INSTR_TIME_SET_CURRENT(io_start); - else - INSTR_TIME_SET_ZERO(io_start); + io_start = pgstat_prepare_io_time(); smgrwrite(RelationGetSmgr(rel), BufTagGetForkNum(&bufHdr->tag), @@ -3609,19 +3589,12 @@ FlushRelationBuffers(Relation rel) localpage, false); - buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); - pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); - - if (track_io_timing) - { - INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_SUBTRACT(io_time, io_start); - pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); - INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); - } + pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, + IOCONTEXT_NORMAL, IOOP_WRITE, + io_start); pgBufferUsage.local_blks_written++; diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index ecccb6c1a9..a70e8dc523 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -114,8 +114,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, LocalBufferLookupEnt *hresult; BufferDesc *bufHdr; int b; - instr_time io_start, - io_time; int trycounter; bool found; uint32 buf_state; @@ -214,6 +212,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, */ if (buf_state & BM_DIRTY) { + instr_time io_start; SMgrRelation oreln; Page localpage = (char *) LocalBufHdrGetBlock(bufHdr); @@ -222,10 +221,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); - if (track_io_timing) - INSTR_TIME_SET_CURRENT(io_start); - else - INSTR_TIME_SET_ZERO(io_start); + io_start = pgstat_prepare_io_time(); /* And write... */ smgrwrite(oreln, @@ -238,17 +234,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); - /* Temporary table I/O does not use Buffer Access Strategies */ - pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); - - if (track_io_timing) - { - INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_SUBTRACT(io_time, io_start); - pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); - INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); - } - + pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, + IOOP_WRITE, io_start); pgBufferUsage.local_blks_written++; } diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 352958e1fe..51a9134d57 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -1030,6 +1030,19 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ )) { + instr_time io_start; + + ereport(DEBUG1, + (errmsg_internal("could not forward fsync request because request queue is full"))); + + io_start = pgstat_prepare_io_time(); + + if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + FilePathName(seg->mdfd_vfd)))); + /* * We have no way of knowing if the current IOContext is * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this @@ -1041,16 +1054,8 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) * IOCONTEXT_NORMAL is likely clearer when investigating the number of * backend fsyncs. */ - pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC); - - ereport(DEBUG1, - (errmsg_internal("could not forward fsync request because request queue is full"))); - - if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) - ereport(data_sync_elevel(ERROR), - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", - FilePathName(seg->mdfd_vfd)))); + pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL, + IOOP_FSYNC, io_start); } } @@ -1399,6 +1404,7 @@ int mdsyncfiletag(const FileTag *ftag, char *path) { SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId); + instr_time io_start; File file; bool need_to_close; int result, @@ -1425,6 +1431,8 @@ mdsyncfiletag(const FileTag *ftag, char *path) need_to_close = true; } + io_start = pgstat_prepare_io_time(); + /* Sync the file. */ result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC); save_errno = errno; @@ -1432,7 +1440,8 @@ mdsyncfiletag(const FileTag *ftag, char *path) if (need_to_close) FileClose(file); - pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC); + pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL, + IOOP_FSYNC, io_start); errno = save_errno; return result; diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index ae8bb34f78..4e98c4749a 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -16,45 +16,65 @@ #include "postgres.h" +#include "executor/instrument.h" +#include "storage/bufmgr.h" #include "utils/pgstat_internal.h" -static PgStat_BktypeIO PendingIOStats; +typedef struct PgStat_PendingIO +{ + PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; + instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; +} PgStat_PendingIO; + + +static PgStat_PendingIO PendingIOStats; bool have_iostats = false; /* * Check that stats have not been counted for any combination of IOObject, - * IOContext, and IOOp which are not tracked for the passed-in BackendType. The - * passed-in PgStat_BktypeIO must contain stats from the BackendType specified - * by the second parameter. Caller is responsible for locking the passed-in - * PgStat_BktypeIO, if needed. + * IOContext, and IOOp which are not tracked for the passed-in BackendType. If + * the IOOp is not counted for this combination but IO time is otherwise + * tracked for this IOOp, check that IO time has not been counted for this + * combination. If stats are tracked for this combination and IO times are + * non-zero, counts should be non-zero. + * + * The passed-in PgStat_BktypeIO must contain stats from the BackendType + * specified by the second parameter. Caller is responsible for locking the + * passed-in PgStat_BktypeIO, if needed. */ bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, BackendType bktype) { - bool bktype_tracked = pgstat_tracks_io_bktype(bktype); - for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) { for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++) { - /* - * Don't bother trying to skip to the next loop iteration if - * pgstat_tracks_io_object() would return false here. We still - * need to validate that each counter is zero anyway. - */ for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) { - /* No stats, so nothing to validate */ - if (backend_io->data[io_object][io_context][io_op] == 0) + /* we do track it */ + if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op)) + { + /* ensure that if IO times are non-zero, counts are > 0 */ + if (backend_io->times[io_object][io_context][io_op] != 0 && + backend_io->counts[io_object][io_context][io_op] <= 0) + return false; + continue; + } - /* There are stats and there shouldn't be */ - if (!bktype_tracked || - !pgstat_tracks_io_op(bktype, io_object, io_context, io_op)) + /* we don't track it, and it is not 0 */ + if (backend_io->counts[io_object][io_context][io_op] != 0) return false; + + /* we don't track this IOOp, so make sure its IO time is zero */ + if (pgstat_tracks_io_time(io_op) > -1) + { + if (backend_io->times[io_object][io_context][io_op] != 0) + return false; + } } } } @@ -62,6 +82,19 @@ pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, return true; } +instr_time +pgstat_prepare_io_time(void) +{ + instr_time io_start; + + if (track_io_timing) + INSTR_TIME_SET_CURRENT(io_start); + else + INSTR_TIME_SET_ZERO(io_start); + + return io_start; +} + void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) { @@ -70,11 +103,44 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) Assert((unsigned int) io_op < IOOP_NUM_TYPES); Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); - PendingIOStats.data[io_object][io_context][io_op]++; + PendingIOStats.counts[io_object][io_context][io_op]++; have_iostats = true; } +/* + * Like pgstat_count_io_op() except it also accumulates time. + */ +void +pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, + instr_time start_time) +{ + if (track_io_timing) + { + instr_time io_time; + + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, start_time); + + if (io_op == IOOP_WRITE) + { + pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); + if (io_object == IOOBJECT_RELATION) + INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); + } + else if (io_op == IOOP_READ) + { + pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); + if (io_object == IOOBJECT_RELATION) + INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); + } + + INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], io_time); + } + + pgstat_count_io_op(io_object, io_context, io_op); +} + PgStat_IO * pgstat_fetch_stat_io(void) { @@ -114,8 +180,17 @@ pgstat_flush_io(bool nowait) for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++) { for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) - bktype_shstats->data[io_object][io_context][io_op] += - PendingIOStats.data[io_object][io_context][io_op]; + { + instr_time time; + + bktype_shstats->counts[io_object][io_context][io_op] += + PendingIOStats.counts[io_object][io_context][io_op]; + + time = PendingIOStats.pending_times[io_object][io_context][io_op]; + + bktype_shstats->times[io_object][io_context][io_op] += + INSTR_TIME_GET_MICROSEC(time); + } } } @@ -384,3 +459,31 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object, return true; } + +/* + * PgStat_BktypeIO->times contains IO times for IOOps. For simplicity this + * array has a spot for every IOOp. pgstat_tracks_io_time() is the source of + * truth for which IOOps have corresponding IO times. + */ +IOOp +pgstat_tracks_io_time(IOOp io_op) +{ + switch (io_op) + { + case IOOP_READ: + return IOOP_READ; + case IOOP_WRITE: + return IOOP_WRITE; + case IOOP_EXTEND: + return IOOP_EXTEND; + case IOOP_FSYNC: + return IOOP_FSYNC; + case IOOP_EVICT: + case IOOP_HIT: + case IOOP_REUSE: + return -1; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index eec9f3cf9b..60382ac76a 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1256,13 +1256,17 @@ typedef enum io_stat_col IO_COL_IO_OBJECT, IO_COL_IO_CONTEXT, IO_COL_READS, + IO_COL_READ_TIME, IO_COL_WRITES, + IO_COL_WRITE_TIME, IO_COL_EXTENDS, + IO_COL_EXTEND_TIME, IO_COL_CONVERSION, IO_COL_HITS, IO_COL_EVICTIONS, IO_COL_REUSES, IO_COL_FSYNCS, + IO_COL_FSYNC_TIME, IO_COL_RESET_TIME, IO_NUM_COLUMNS, } io_stat_col; @@ -1296,6 +1300,28 @@ pgstat_get_io_op_index(IOOp io_op) pg_unreachable(); } +/* + * Get the number of the column containing IO times for the specified IOOp. If + * the specified IOOp is one for which IO time is not tracked, return -1. Note + * that this function assumes that IO time for an IOOp is displayed in the view + * in the column directly after the IOOp counts. + */ +static io_stat_col +pgstat_get_io_time_index(IOOp io_op) +{ + if (pgstat_tracks_io_time(io_op) == -1) + return -1; + + return pgstat_get_io_op_index(io_op) + 1; +} + +static inline +PgStat_Counter +pg_stat_micro_to_millisecs(PgStat_Counter val_microsec) +{ + return val_microsec * 0.001; +} + Datum pg_stat_get_io(PG_FUNCTION_ARGS) { @@ -1363,20 +1389,32 @@ pg_stat_get_io(PG_FUNCTION_ARGS) for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) { - int col_idx = pgstat_get_io_op_index(io_op); + PgStat_Counter count = bktype_stats->counts[io_obj][io_context][io_op]; + int i = pgstat_get_io_op_index(io_op); /* * Some combinations of BackendType and IOOp, of IOContext * and IOOp, and of IOObject and IOOp are not tracked. Set * these cells in the view NULL. */ - nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op); + if (pgstat_tracks_io_op(bktype, io_obj, io_context, io_op)) + values[i] = Int64GetDatum(count); + else + nulls[i] = true; + } - if (nulls[col_idx]) + for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) + { + PgStat_Counter time = bktype_stats->times[io_obj][io_context][io_op]; + int i = pgstat_get_io_time_index(io_op); + + if (i == -1) continue; - values[col_idx] = - Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]); + if (!nulls[pgstat_get_io_op_index(io_op)]) + values[i] = Float8GetDatum(pg_stat_micro_to_millisecs(time)); + else + nulls[i] = true; } tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index f9f2642201..664a7ab41d 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5749,9 +5749,9 @@ proname => 'pg_stat_get_io', provolatile => 'v', prorows => '30', proretset => 't', proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}', - proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}', + proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,io_object,io_context,reads,read_time,writes,write_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}', prosrc => 'pg_stat_get_io' }, { oid => '1136', descr => 'statistics: information about WAL activity', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 75d258d921..bf54c6defe 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -306,7 +306,8 @@ typedef enum IOOp typedef struct PgStat_BktypeIO { - PgStat_Counter data[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; + PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; + PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; } PgStat_BktypeIO; typedef struct PgStat_IO @@ -515,7 +516,10 @@ extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops, BackendType bktype); +extern instr_time pgstat_prepare_io_time(void); extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op); +extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time time); + extern PgStat_IO *pgstat_fetch_stat_io(void); extern const char *pgstat_get_io_context_name(IOContext io_context); extern const char *pgstat_get_io_object_name(IOObject io_object); @@ -525,6 +529,7 @@ extern bool pgstat_tracks_io_object(BackendType bktype, IOObject io_object, IOContext io_context); extern bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, IOContext io_context, IOOp io_op); +extern IOOp pgstat_tracks_io_time(IOOp io_op); /* diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index ab1aebfde4..d939d8067e 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1881,15 +1881,19 @@ pg_stat_io| SELECT backend_type, io_object, io_context, reads, + read_time, writes, + write_time, extends, + extend_time, op_bytes, hits, evictions, reuses, fsyncs, + fsync_time, stats_reset - FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset); + FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, read_time, writes, write_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, -- 2.37.2
From f4e0db5c833f33b30d4c0b4bebec1096a1745d81 Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Tue, 21 Mar 2023 18:20:44 -0400 Subject: [PATCH v8 2/4] FlushRelationBuffers() counts temp relation IO timing Add pgstat_database and pgBufferUsage IO timing counting to FlushRelationBuffers() for writes of temporary relations. --- src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b3adbbe7d2..05e98d5994 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3571,6 +3571,8 @@ FlushRelationBuffers(Relation rel) { int i; BufferDesc *bufHdr; + instr_time io_start, + io_time; if (RelationUsesLocalBuffers(rel)) { @@ -3596,17 +3598,33 @@ FlushRelationBuffers(Relation rel) PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); + if (track_io_timing) + INSTR_TIME_SET_CURRENT(io_start); + else + INSTR_TIME_SET_ZERO(io_start); + smgrwrite(RelationGetSmgr(rel), BufTagGetForkNum(&bufHdr->tag), bufHdr->tag.blockNum, localpage, false); + buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); + if (track_io_timing) + { + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, io_start); + pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); + INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); + } + + pgBufferUsage.local_blks_written++; + /* Pop the error context stack */ error_context_stack = errcallback.previous; } -- 2.37.2