Hi,
In 'instr_time.h' it is stated that:
* When summing multiple measurements, it's recommended to leave the
* running sum in instr_time form (ie, use INSTR_TIME_ADD or
* INSTR_TIME_ACCUM_DIFF) and convert to a result format only at the end.
So, I refactored 'PendingWalStats' to use 'instr_time' instead of
'PgStat_Counter' while accumulating 'wal_write_time' and
'wal_sync_time'. Also, I refactored some calculations to use
'INSTR_TIME_ACCUM_DIFF' instead of using 'INSTR_TIME_SUBTRACT' and
'INSTR_TIME_ADD'. What do you think?
Regards,
Nazir Bilal Yavuz
Microsoft
From 5216c70da53ff32b8e9898595445ad6f4880b06d Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Thu, 2 Feb 2023 15:06:48 +0300
Subject: [PATCH v1] Refactor instr_time calculations
---
src/backend/access/transam/xlog.c | 6 ++----
src/backend/storage/file/buffile.c | 6 ++----
src/backend/utils/activity/pgstat_wal.c | 27 ++++++++++++-------------
src/include/pgstat.h | 17 +++++++++++++++-
4 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d1..3c35dc1ca23 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2206,8 +2206,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
instr_time duration;
INSTR_TIME_SET_CURRENT(duration);
- INSTR_TIME_SUBTRACT(duration, start);
- PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+ INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, duration, start);
}
PendingWalStats.wal_write++;
@@ -8201,8 +8200,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
instr_time duration;
INSTR_TIME_SET_CURRENT(duration);
- INSTR_TIME_SUBTRACT(duration, start);
- PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+ INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, duration, start);
}
PendingWalStats.wal_sync++;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 0a51624df3b..e55f86b675e 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -469,8 +469,7 @@ BufFileLoadBuffer(BufFile *file)
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
- INSTR_TIME_SUBTRACT(io_time, io_start);
- INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+ INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
}
/* we choose not to advance curOffset here */
@@ -544,8 +543,7 @@ BufFileDumpBuffer(BufFile *file)
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
- INSTR_TIME_SUBTRACT(io_time, io_start);
- INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+ INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
}
file->curOffset += bytestowrite;
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e8598b2f4e0..56211fbc61a 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -21,7 +21,7 @@
#include "executor/instrument.h"
-PgStat_WalStats PendingWalStats = {0};
+PgStat_PendingWalUsage PendingWalStats = {0};
/*
* WAL usage counters saved from pgWALUsage at the previous call to
@@ -89,26 +89,25 @@ pgstat_flush_wal(bool nowait)
* previous counters from the current ones.
*/
WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
- PendingWalStats.wal_records = diff.wal_records;
- PendingWalStats.wal_fpi = diff.wal_fpi;
- PendingWalStats.wal_bytes = diff.wal_bytes;
if (!nowait)
LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
return true;
-#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
- WALSTAT_ACC(wal_records);
- WALSTAT_ACC(wal_fpi);
- WALSTAT_ACC(wal_bytes);
- WALSTAT_ACC(wal_buffers_full);
- WALSTAT_ACC(wal_write);
- WALSTAT_ACC(wal_sync);
- WALSTAT_ACC(wal_write_time);
- WALSTAT_ACC(wal_sync_time);
+#define WALSTAT_ACC(fld, var_to_add) \
+ (stats_shmem->stats.fld += var_to_add.fld)
+#define WALLSTAT_ACC_INSTR_TIME_TYPE(fld) \
+ (stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld))
+ WALSTAT_ACC(wal_records, diff);
+ WALSTAT_ACC(wal_fpi, diff);
+ WALSTAT_ACC(wal_bytes, diff);
+ WALSTAT_ACC(wal_buffers_full, PendingWalStats);
+ WALSTAT_ACC(wal_write, PendingWalStats);
+ WALSTAT_ACC(wal_sync, PendingWalStats);
+ WALLSTAT_ACC_INSTR_TIME_TYPE(wal_write_time);
+ WALLSTAT_ACC_INSTR_TIME_TYPE(wal_sync_time);
#undef WALSTAT_ACC
-
LWLockRelease(&stats_shmem->lock);
/*
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index db9675884f3..295c5eabf38 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -445,6 +445,21 @@ typedef struct PgStat_WalStats
TimestampTz stat_reset_timestamp;
} PgStat_WalStats;
+/* Created for accumulating wal_write_time and wal_sync_time as a instr_time
+ * but instr_time can't be used as a type where it ends up on-disk
+ * because its units may change. PgStat_WalStats type is used for
+ * in-memory/on-disk data. So, PgStat_PendingWalUsage is created for
+ * accumulating intervals as a instr_time.
+ */
+typedef struct PgStat_PendingWalUsage
+{
+ PgStat_Counter wal_buffers_full;
+ PgStat_Counter wal_write;
+ PgStat_Counter wal_sync;
+ instr_time wal_write_time;
+ instr_time wal_sync_time;
+} PgStat_PendingWalUsage;
+
/*
* Functions in pgstat.c
@@ -758,7 +773,7 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
*/
/* updated directly by backends and background processes */
-extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
+extern PGDLLIMPORT PgStat_PendingWalUsage PendingWalStats;
#endif /* PGSTAT_H */
--
2.39.1