Hi, On 2023-03-31 15:44:58 -0400, Melanie Plageman wrote: > From 789d4bf1fb749a26523dbcd2c69795916b711c68 Mon Sep 17 00:00:00 2001 > From: Melanie Plageman <melanieplage...@gmail.com> > Date: Tue, 21 Mar 2023 16:00:55 -0400 > Subject: [PATCH v8 1/4] Count IO time for temp relation writes > > Both pgstat_database and pgBufferUsage write times failed to count > timing for flushes of dirty local buffers when acquiring a new local > buffer for a temporary relation block.
I think it'd be worth mentioning here that we do count read time? Otherwise it'd not be as clear that adding tracking increases consistency... > From f4e0db5c833f33b30d4c0b4bebec1096a1745d81 Mon Sep 17 00:00:00 2001 > From: Melanie Plageman <melanieplage...@gmail.com> > Date: Tue, 21 Mar 2023 18:20:44 -0400 > Subject: [PATCH v8 2/4] FlushRelationBuffers() counts temp relation IO timing > > Add pgstat_database and pgBufferUsage IO timing counting to > FlushRelationBuffers() for writes of temporary relations. > --- > src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++++ > 1 file changed, 18 insertions(+) > > diff --git a/src/backend/storage/buffer/bufmgr.c > b/src/backend/storage/buffer/bufmgr.c > index b3adbbe7d2..05e98d5994 100644 > --- a/src/backend/storage/buffer/bufmgr.c > +++ b/src/backend/storage/buffer/bufmgr.c > @@ -3571,6 +3571,8 @@ FlushRelationBuffers(Relation rel) > { > int i; > BufferDesc *bufHdr; > + instr_time io_start, > + io_time; > > if (RelationUsesLocalBuffers(rel)) > { > @@ -3596,17 +3598,33 @@ FlushRelationBuffers(Relation rel) > > PageSetChecksumInplace(localpage, > bufHdr->tag.blockNum); > > + if (track_io_timing) > + INSTR_TIME_SET_CURRENT(io_start); > + else > + INSTR_TIME_SET_ZERO(io_start); > + > smgrwrite(RelationGetSmgr(rel), > > BufTagGetForkNum(&bufHdr->tag), > bufHdr->tag.blockNum, > localpage, > false); > > + Spurious newline. > buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); > pg_atomic_unlocked_write_u32(&bufHdr->state, > buf_state); > > pgstat_count_io_op(IOOBJECT_TEMP_RELATION, > IOCONTEXT_NORMAL, IOOP_WRITE); > > + if (track_io_timing) > + { > + INSTR_TIME_SET_CURRENT(io_time); > + INSTR_TIME_SUBTRACT(io_time, io_start); > + > pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); > + > INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); > + } > + > + pgBufferUsage.local_blks_written++; > + > /* Pop the error context stack */ > error_context_stack = errcallback.previous; > } > -- > 2.37.2 > > From 2bdad725133395ded199ecc726096e052d6e654b Mon Sep 17 00:00:00 2001 > From: Melanie Plageman <melanieplage...@gmail.com> > Date: Fri, 31 Mar 2023 15:32:36 -0400 > Subject: [PATCH v8 3/4] Track IO times in pg_stat_io > > Add IO timing for reads, writes, extends, and fsyncs to pg_stat_io. > > Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com> > Reviewed-by: Andres Freund <and...@anarazel.de> > Discussion: > https://www.postgresql.org/message-id/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com > --- > -static PgStat_BktypeIO PendingIOStats; > +typedef struct PgStat_PendingIO > +{ > + PgStat_Counter > counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; > + instr_time > pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; > +} PgStat_PendingIO; Probably will look less awful after adding the typedef to typedefs.list. > + /* we do track it */ > + if (pgstat_tracks_io_op(bktype, io_object, > io_context, io_op)) > + { > + /* ensure that if IO times are > non-zero, counts are > 0 */ > + if > (backend_io->times[io_object][io_context][io_op] != 0 && > + > backend_io->counts[io_object][io_context][io_op] <= 0) > + return false; > + > continue; > + } > > - /* There are stats and there shouldn't be */ > - if (!bktype_tracked || > - !pgstat_tracks_io_op(bktype, io_object, > io_context, io_op)) > + /* we don't track it, and it is not 0 */ > + if > (backend_io->counts[io_object][io_context][io_op] != 0) > return false; > + > + /* we don't track this IOOp, so make sure its > IO time is zero */ > + if (pgstat_tracks_io_time(io_op) > -1) > + { > + if > (backend_io->times[io_object][io_context][io_op] != 0) > + return false; > + } I'm somehow doubtful it's worth having pgstat_tracks_io_time, what kind of error would be caught by this check? > +/* > + * Get the number of the column containing IO times for the specified IOOp. > If > + * the specified IOOp is one for which IO time is not tracked, return -1. > Note > + * that this function assumes that IO time for an IOOp is displayed in the > view > + * in the column directly after the IOOp counts. > + */ > +static io_stat_col > +pgstat_get_io_time_index(IOOp io_op) > +{ > + if (pgstat_tracks_io_time(io_op) == -1) > + return -1; That seems dangerous - won't it just lead to accessing something from before the start of the array? Probably should just assert. > @@ -1363,20 +1389,32 @@ pg_stat_get_io(PG_FUNCTION_ARGS) > > for (int io_op = 0; io_op < IOOP_NUM_TYPES; > io_op++) > { > - int col_idx = > pgstat_get_io_op_index(io_op); > + PgStat_Counter count = > bktype_stats->counts[io_obj][io_context][io_op]; > + int i = > pgstat_get_io_op_index(io_op); > > /* > * Some combinations of BackendType and > IOOp, of IOContext > * and IOOp, and of IOObject and IOOp > are not tracked. Set > * these cells in the view NULL. > */ > - nulls[col_idx] = > !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op); > + if (pgstat_tracks_io_op(bktype, io_obj, > io_context, io_op)) > + values[i] = > Int64GetDatum(count); > + else > + nulls[i] = true; > + } > > - if (nulls[col_idx]) > + for (int io_op = 0; io_op < IOOP_NUM_TYPES; > io_op++) > + { > + PgStat_Counter time = > bktype_stats->times[io_obj][io_context][io_op]; > + int i = > pgstat_get_io_time_index(io_op); > + > + if (i == -1) > continue; > > - values[col_idx] = > - > Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]); > + if > (!nulls[pgstat_get_io_op_index(io_op)]) > + values[i] = > Float8GetDatum(pg_stat_micro_to_millisecs(time)); > + else > + nulls[i] = true; > } Why two loops? Greetings, Andres Freund