Hi,

On 2023-03-31 15:44:58 -0400, Melanie Plageman wrote:
> From 789d4bf1fb749a26523dbcd2c69795916b711c68 Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplage...@gmail.com>
> Date: Tue, 21 Mar 2023 16:00:55 -0400
> Subject: [PATCH v8 1/4] Count IO time for temp relation writes
>
> Both pgstat_database and pgBufferUsage write times failed to count
> timing for flushes of dirty local buffers when acquiring a new local
> buffer for a temporary relation block.

I think it'd be worth mentioning here that we do count read time? Otherwise
it'd not be as clear that adding tracking increases consistency...



> From f4e0db5c833f33b30d4c0b4bebec1096a1745d81 Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplage...@gmail.com>
> Date: Tue, 21 Mar 2023 18:20:44 -0400
> Subject: [PATCH v8 2/4] FlushRelationBuffers() counts temp relation IO timing
>
> Add pgstat_database and pgBufferUsage IO timing counting to
> FlushRelationBuffers() for writes of temporary relations.
> ---
>  src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
>
> diff --git a/src/backend/storage/buffer/bufmgr.c 
> b/src/backend/storage/buffer/bufmgr.c
> index b3adbbe7d2..05e98d5994 100644
> --- a/src/backend/storage/buffer/bufmgr.c
> +++ b/src/backend/storage/buffer/bufmgr.c
> @@ -3571,6 +3571,8 @@ FlushRelationBuffers(Relation rel)
>  {
>       int                     i;
>       BufferDesc *bufHdr;
> +     instr_time      io_start,
> +                             io_time;
>
>       if (RelationUsesLocalBuffers(rel))
>       {
> @@ -3596,17 +3598,33 @@ FlushRelationBuffers(Relation rel)
>
>                               PageSetChecksumInplace(localpage, 
> bufHdr->tag.blockNum);
>
> +                             if (track_io_timing)
> +                                     INSTR_TIME_SET_CURRENT(io_start);
> +                             else
> +                                     INSTR_TIME_SET_ZERO(io_start);
> +
>                               smgrwrite(RelationGetSmgr(rel),
>                                                 
> BufTagGetForkNum(&bufHdr->tag),
>                                                 bufHdr->tag.blockNum,
>                                                 localpage,
>                                                 false);
>
> +

Spurious newline.


>                               buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
>                               pg_atomic_unlocked_write_u32(&bufHdr->state, 
> buf_state);
>
>                               pgstat_count_io_op(IOOBJECT_TEMP_RELATION, 
> IOCONTEXT_NORMAL, IOOP_WRITE);
>
> +                             if (track_io_timing)
> +                             {
> +                                     INSTR_TIME_SET_CURRENT(io_time);
> +                                     INSTR_TIME_SUBTRACT(io_time, io_start);
> +                                     
> pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
> +                                     
> INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
> +                             }
> +
> +                             pgBufferUsage.local_blks_written++;
> +
>                               /* Pop the error context stack */
>                               error_context_stack = errcallback.previous;
>                       }
> --
> 2.37.2
>


> From 2bdad725133395ded199ecc726096e052d6e654b Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplage...@gmail.com>
> Date: Fri, 31 Mar 2023 15:32:36 -0400
> Subject: [PATCH v8 3/4] Track IO times in pg_stat_io
>
> Add IO timing for reads, writes, extends, and fsyncs to pg_stat_io.
>
> Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com>
> Reviewed-by: Andres Freund <and...@anarazel.de>
> Discussion: 
> https://www.postgresql.org/message-id/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com
> ---

> -static PgStat_BktypeIO PendingIOStats;
> +typedef struct PgStat_PendingIO
> +{
> +     PgStat_Counter 
> counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
> +     instr_time      
> pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
> +}                    PgStat_PendingIO;

Probably will look less awful after adding the typedef to typedefs.list.


> +                             /* we do track it */
> +                             if (pgstat_tracks_io_op(bktype, io_object, 
> io_context, io_op))
> +                             {
> +                                     /* ensure that if IO times are 
> non-zero, counts are > 0 */
> +                                     if 
> (backend_io->times[io_object][io_context][io_op] != 0 &&
> +                                             
> backend_io->counts[io_object][io_context][io_op] <= 0)
> +                                             return false;
> +
>                                       continue;
> +                             }
>
> -                             /* There are stats and there shouldn't be */
> -                             if (!bktype_tracked ||
> -                                     !pgstat_tracks_io_op(bktype, io_object, 
> io_context, io_op))
> +                             /* we don't track it, and it is not 0 */
> +                             if 
> (backend_io->counts[io_object][io_context][io_op] != 0)
>                                       return false;
> +
> +                             /* we don't track this IOOp, so make sure its 
> IO time is zero */
> +                             if (pgstat_tracks_io_time(io_op) > -1)
> +                             {
> +                                     if 
> (backend_io->times[io_object][io_context][io_op] != 0)
> +                                             return false;
> +                             }

I'm somehow doubtful it's worth having pgstat_tracks_io_time, what kind of
error would be caught by this check?


> +/*
> + * Get the number of the column containing IO times for the specified IOOp. 
> If
> + * the specified IOOp is one for which IO time is not tracked, return -1. 
> Note
> + * that this function assumes that IO time for an IOOp is displayed in the 
> view
> + * in the column directly after the IOOp counts.
> + */
> +static io_stat_col
> +pgstat_get_io_time_index(IOOp io_op)
> +{
> +     if (pgstat_tracks_io_time(io_op) == -1)
> +             return -1;

That seems dangerous - won't it just lead to accessing something from before
the start of the array? Probably should just assert.



> @@ -1363,20 +1389,32 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
>
>                               for (int io_op = 0; io_op < IOOP_NUM_TYPES; 
> io_op++)
>                               {
> -                                     int                     col_idx = 
> pgstat_get_io_op_index(io_op);
> +                                     PgStat_Counter count = 
> bktype_stats->counts[io_obj][io_context][io_op];
> +                                     int                     i = 
> pgstat_get_io_op_index(io_op);
>
>                                       /*
>                                        * Some combinations of BackendType and 
> IOOp, of IOContext
>                                        * and IOOp, and of IOObject and IOOp 
> are not tracked. Set
>                                        * these cells in the view NULL.
>                                        */
> -                                     nulls[col_idx] = 
> !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op);
> +                                     if (pgstat_tracks_io_op(bktype, io_obj, 
> io_context, io_op))
> +                                             values[i] = 
> Int64GetDatum(count);
> +                                     else
> +                                             nulls[i] = true;
> +                             }
>
> -                                     if (nulls[col_idx])
> +                             for (int io_op = 0; io_op < IOOP_NUM_TYPES; 
> io_op++)
> +                             {
> +                                     PgStat_Counter time = 
> bktype_stats->times[io_obj][io_context][io_op];
> +                                     int                     i = 
> pgstat_get_io_time_index(io_op);
> +
> +                                     if (i == -1)
>                                               continue;
>
> -                                     values[col_idx] =
> -                                             
> Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]);
> +                                     if 
> (!nulls[pgstat_get_io_op_index(io_op)])
> +                                             values[i] = 
> Float8GetDatum(pg_stat_micro_to_millisecs(time));
> +                                     else
> +                                             nulls[i] = true;
>                               }

Why two loops?

Greetings,

Andres Freund


Reply via email to