Hello,

thank you much for your attention and for your thought.

On 20.03.2022 20:36, Andres Freund wrote:
This patch introduces an additional counter of wal records not related to
the query being executed.

They're not unrelated though.

Yes, i've missformulated here.
Indeed there is a relation but it seems of the some other kind.
It would be nice to clarify the terminology.
Maybe divide WAL records into two kinds:
1) WAL records, the number of which depends on the given query itself. (say strong relation) 2) WAL records, the number of which depends on the given query and on the previous query history. (say weak relation)

So modified in the patch wal_records counter will belongs to the first kind while the number of wal records due to on-access pruning and new clog page generation to the second.

-many. For read-only queries the generated WAL due to on-access pruning can be
a significant factor in performance. Removing that information makes
pg_stat_statments *less* useful.

A separate counter for the second type of records, say, extra_wal_records, will not only remove this disadvantage, but on the contrary will provide additional information.

The next version of the patch with additional counter is attached.

Really, now it is clearly seen that sometimes
WAL due to on-access pruning can be a significant factor !
After  pgbench -c10 -t300:
postgres=# SELECT substring(query for 30), wal_records, extra_wal_records FROM pg_stat_statements WHERE extra_wal_records != 0;

           substring            | wal_records | extra_wal_records
--------------------------------+-------------+-------------------
 UPDATE pgbench_tellers SET tba |        4557 |                15
 create table pgbench_history(t |          48 |                 1
 create table pgbench_branches( |          40 |                 1
 UPDATE pgbench_accounts SET ab |        5868 |              1567
 drop table if exists pgbench_a |          94 |                 1
 UPDATE pgbench_branches SET bb |        5993 |                14
 SELECT abalance FROM pgbench_a |           0 |                 7
(7 rows)

Can the test failures be encountered without such an elaborate setup? If not,
then I don't really see why we need to do anything here?

There was a real bug report from our test department. They do long time repetitive tests and sometimes met this failure. So i suppose there is a non-zero probability that such error can occur in the one-shot test as well.
The sequence given in the first letter helps to catch this failure quickly.

With best regards,

--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
commit 2cc4234754fc815528ed299b64c1a3f1e1991617
Author: Anton A. Melnikov <a.melni...@postgrespro.ru>
Date:   Wed Mar 30 08:54:51 2022 +0300

    Fix possible fails in pg_stat_statements test via taking into account WAL records due to on-access pruning and new clog page generation.

diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 7fabd96f38..145b2617d7 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -6,11 +6,12 @@ OBJS = \
 	pg_stat_statements.o
 
 EXTENSION = pg_stat_statements
-DATA = pg_stat_statements--1.4.sql pg_stat_statements--1.8--1.9.sql \
-	pg_stat_statements--1.7--1.8.sql pg_stat_statements--1.6--1.7.sql \
-	pg_stat_statements--1.5--1.6.sql pg_stat_statements--1.4--1.5.sql \
-	pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.2--1.3.sql \
-	pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.0--1.1.sql
+DATA = pg_stat_statements--1.4.sql pg_stat_statements--1.9--1.10.sql  \
+	pg_stat_statements--1.8--1.9.sql pg_stat_statements--1.7--1.8.sql \
+	pg_stat_statements--1.6--1.7.sql pg_stat_statements--1.5--1.6.sql \
+	pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.3--1.4.sql \
+	pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.1--1.2.sql \
+	pg_stat_statements--1.0--1.1.sql
 PGFILEDESC = "pg_stat_statements - execution statistics of SQL statements"
 
 LDFLAGS_SL += $(filter -lm, $(LIBS))
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index e0abe34bb6..9a375d796f 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -234,21 +234,23 @@ SET pg_stat_statements.track_utility = FALSE;
 SELECT query, calls, rows,
 wal_bytes > 0 as wal_bytes_generated,
 wal_records > 0 as wal_records_generated,
-wal_records = rows as wal_records_as_rows
+wal_records = rows as wal_records_as_rows,
+extra_wal_records IS NOT NULL as extra_wal_records_supported
 FROM pg_stat_statements ORDER BY query COLLATE "C";
-                           query                           | calls | rows | wal_bytes_generated | wal_records_generated | wal_records_as_rows 
------------------------------------------------------------+-------+------+---------------------+-----------------------+---------------------
- DELETE FROM pgss_test WHERE a > $1                        |     1 |    1 | t                   | t                     | t
- DROP TABLE pgss_test                                      |     1 |    0 | t                   | t                     | f
- INSERT INTO pgss_test VALUES(generate_series($1, $2), $3) |     1 |   10 | t                   | t                     | t
- SELECT pg_stat_statements_reset()                         |     1 |    1 | f                   | f                     | f
- SELECT query, calls, rows,                               +|     0 |    0 | f                   | f                     | t
- wal_bytes > $1 as wal_bytes_generated,                   +|       |      |                     |                       | 
- wal_records > $2 as wal_records_generated,               +|       |      |                     |                       | 
- wal_records = rows as wal_records_as_rows                +|       |      |                     |                       | 
- FROM pg_stat_statements ORDER BY query COLLATE "C"        |       |      |                     |                       | 
- SET pg_stat_statements.track_utility = FALSE              |     1 |    0 | f                   | f                     | t
- UPDATE pgss_test SET b = $1 WHERE a > $2                  |     1 |    3 | t                   | t                     | t
+                            query                             | calls | rows | wal_bytes_generated | wal_records_generated | wal_records_as_rows | extra_wal_records_supported 
+--------------------------------------------------------------+-------+------+---------------------+-----------------------+---------------------+-----------------------------
+ DELETE FROM pgss_test WHERE a > $1                           |     1 |    1 | t                   | t                     | t                   | t
+ DROP TABLE pgss_test                                         |     1 |    0 | t                   | t                     | f                   | t
+ INSERT INTO pgss_test VALUES(generate_series($1, $2), $3)    |     1 |   10 | t                   | t                     | t                   | t
+ SELECT pg_stat_statements_reset()                            |     1 |    1 | f                   | f                     | f                   | t
+ SELECT query, calls, rows,                                  +|     0 |    0 | f                   | f                     | t                   | t
+ wal_bytes > $1 as wal_bytes_generated,                      +|       |      |                     |                       |                     | 
+ wal_records > $2 as wal_records_generated,                  +|       |      |                     |                       |                     | 
+ wal_records = rows as wal_records_as_rows,                  +|       |      |                     |                       |                     | 
+ extra_wal_records IS NOT NULL as extra_wal_records_supported+|       |      |                     |                       |                     | 
+ FROM pg_stat_statements ORDER BY query COLLATE "C"           |       |      |                     |                       |                     | 
+ SET pg_stat_statements.track_utility = FALSE                 |     1 |    0 | f                   | f                     | t                   | t
+ UPDATE pgss_test SET b = $1 WHERE a > $2                     |     1 |    3 | t                   | t                     | t                   | t
 (7 rows)
 
 --
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.9--1.10.sql b/contrib/pg_stat_statements/pg_stat_statements--1.9--1.10.sql
new file mode 100644
index 0000000000..d8c713db58
--- /dev/null
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.9--1.10.sql
@@ -0,0 +1,58 @@
+/* contrib/pg_stat_statements/pg_stat_statements--1.9--1.10.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_stat_statements UPDATE TO '1.10'" to load this file. \quit
+
+/* First we have to remove them from the extension */
+ALTER EXTENSION pg_stat_statements DROP VIEW pg_stat_statements;
+ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements(boolean);
+
+/* Then we can drop them */
+DROP VIEW pg_stat_statements;
+DROP FUNCTION pg_stat_statements(boolean);
+
+/* Now redefine */
+CREATE FUNCTION pg_stat_statements(IN showtext boolean,
+    OUT userid oid,
+    OUT dbid oid,
+    OUT toplevel bool,
+    OUT queryid bigint,
+    OUT query text,
+    OUT plans int8,
+    OUT total_plan_time float8,
+    OUT min_plan_time float8,
+    OUT max_plan_time float8,
+    OUT mean_plan_time float8,
+    OUT stddev_plan_time float8,
+    OUT calls int8,
+    OUT total_exec_time float8,
+    OUT min_exec_time float8,
+    OUT max_exec_time float8,
+    OUT mean_exec_time float8,
+    OUT stddev_exec_time float8,
+    OUT rows int8,
+    OUT shared_blks_hit int8,
+    OUT shared_blks_read int8,
+    OUT shared_blks_dirtied int8,
+    OUT shared_blks_written int8,
+    OUT local_blks_hit int8,
+    OUT local_blks_read int8,
+    OUT local_blks_dirtied int8,
+    OUT local_blks_written int8,
+    OUT temp_blks_read int8,
+    OUT temp_blks_written int8,
+    OUT blk_read_time float8,
+    OUT blk_write_time float8,
+    OUT wal_records int8,
+	OUT extra_wal_records int8,
+    OUT wal_fpi int8,
+    OUT wal_bytes numeric
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_stat_statements_1_10'
+LANGUAGE C STRICT VOLATILE PARALLEL SAFE;
+
+CREATE VIEW pg_stat_statements AS
+  SELECT * FROM pg_stat_statements(true);
+
+GRANT SELECT ON pg_stat_statements TO PUBLIC;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 55786ae84f..6785eef814 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -88,7 +88,7 @@ PG_MODULE_MAGIC;
 #define PGSS_TEXT_FILE	PG_STAT_TMP_DIR "/pgss_query_texts.stat"
 
 /* Magic number identifying the stats file format */
-static const uint32 PGSS_FILE_HEADER = 0x20201227;
+static const uint32 PGSS_FILE_HEADER = 0x20220329;
 
 /* PostgreSQL major version number, changes in which invalidate all entries */
 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
@@ -121,7 +121,8 @@ typedef enum pgssVersion
 	PGSS_V1_2,
 	PGSS_V1_3,
 	PGSS_V1_8,
-	PGSS_V1_9
+	PGSS_V1_9,
+	PGSS_V1_10
 } pgssVersion;
 
 typedef enum pgssStoreKind
@@ -187,6 +188,7 @@ typedef struct Counters
 	double		blk_write_time; /* time spent writing, in msec */
 	double		usage;			/* usage factor */
 	int64		wal_records;	/* # of WAL records generated */
+	int64		extra_wal_recs;	/* # of extra WAL records generated */
 	int64		wal_fpi;		/* # of WAL full page images generated */
 	uint64		wal_bytes;		/* total amount of WAL generated in bytes */
 } Counters;
@@ -302,6 +304,7 @@ PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
 PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
 PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
 PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
+PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
 PG_FUNCTION_INFO_V1(pg_stat_statements);
 PG_FUNCTION_INFO_V1(pg_stat_statements_info);
 
@@ -1370,7 +1373,8 @@ pgss_store(const char *query, uint64 queryId,
 		e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
 		e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
 		e->counters.usage += USAGE_EXEC(total_time);
-		e->counters.wal_records += walusage->wal_records;
+		e->counters.wal_records += (walusage->wal_records - walusage->extra_wal_recs);
+		e->counters.extra_wal_recs += walusage->extra_wal_recs;
 		e->counters.wal_fpi += walusage->wal_fpi;
 		e->counters.wal_bytes += walusage->wal_bytes;
 
@@ -1422,7 +1426,8 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS)
 #define PG_STAT_STATEMENTS_COLS_V1_3	23
 #define PG_STAT_STATEMENTS_COLS_V1_8	32
 #define PG_STAT_STATEMENTS_COLS_V1_9	33
-#define PG_STAT_STATEMENTS_COLS			33	/* maximum of above */
+#define PG_STAT_STATEMENTS_COLS_V1_10	34
+#define PG_STAT_STATEMENTS_COLS			34	/* maximum of above */
 
 /*
  * Retrieve statement statistics.
@@ -1434,6 +1439,16 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS)
  * expected API version is identified by embedding it in the C name of the
  * function.  Unfortunately we weren't bright enough to do that for 1.1.
  */
+Datum
+pg_stat_statements_1_10(PG_FUNCTION_ARGS)
+{
+	bool		showtext = PG_GETARG_BOOL(0);
+
+	pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
+
+	return (Datum) 0;
+}
+
 Datum
 pg_stat_statements_1_9(PG_FUNCTION_ARGS)
 {
@@ -1547,6 +1562,10 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 			if (api_version != PGSS_V1_9)
 				elog(ERROR, "incorrect number of output arguments");
 			break;
+		case PG_STAT_STATEMENTS_COLS_V1_10:
+			if (api_version != PGSS_V1_10)
+				elog(ERROR, "incorrect number of output arguments");
+			break;
 		default:
 			elog(ERROR, "incorrect number of output arguments");
 	}
@@ -1753,6 +1772,8 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 			Datum		wal_bytes;
 
 			values[i++] = Int64GetDatumFast(tmp.wal_records);
+			if (api_version >= PGSS_V1_10)
+				values[i++] = Int64GetDatumFast(tmp.extra_wal_recs);
 			values[i++] = Int64GetDatumFast(tmp.wal_fpi);
 
 			snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
@@ -1771,6 +1792,7 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 					 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
 					 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
 					 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
+					 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
 					 -1 /* fail if you forget to update this assert */ ));
 
 		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
diff --git a/contrib/pg_stat_statements/pg_stat_statements.control b/contrib/pg_stat_statements/pg_stat_statements.control
index 2f1ce6ed50..0747e48138 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.control
+++ b/contrib/pg_stat_statements/pg_stat_statements.control
@@ -1,5 +1,5 @@
 # pg_stat_statements extension
 comment = 'track planning and execution statistics of all SQL statements executed'
-default_version = '1.9'
+default_version = '1.10'
 module_pathname = '$libdir/pg_stat_statements'
 relocatable = true
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index dffd2c8c18..86f5fd94da 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -122,7 +122,8 @@ SET pg_stat_statements.track_utility = FALSE;
 SELECT query, calls, rows,
 wal_bytes > 0 as wal_bytes_generated,
 wal_records > 0 as wal_records_generated,
-wal_records = rows as wal_records_as_rows
+wal_records = rows as wal_records_as_rows,
+extra_wal_records IS NOT NULL as extra_wal_records_supported
 FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 --
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 3a7e36bd13..ebf2ab9639 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -358,7 +358,19 @@
        <structfield>wal_records</structfield> <type>bigint</type>
       </para>
       <para>
-       Total number of WAL records generated by the statement
+       Total number of WAL records generated by the statement itself
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>extra_wal_records</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of extra WAL records generated during the statement's
+       execution due to hit into pruning of some page or when a new page is
+       required for a new xid in clog (the number of extra WAL records will
+       depends on previous query history)
       </para></entry>
      </row>
 
@@ -854,7 +866,7 @@ query           | SELECT pg_stat_statements_reset(0,0,0)
 calls           | 1
 total_exec_time | 0.189497
 rows            | 1
-hit_percent     | 
+hit_percent     |
 -[ RECORD 2 ]---+--------------------------------------------------&zwsp;---------------------------
 query           | SELECT query, calls, total_exec_time, rows, $1 * shared_blks_hit /          +
                 |                nullif(shared_blks_hit + shared_blks_read, $2) AS hit_percent+
@@ -862,7 +874,7 @@ query           | SELECT query, calls, total_exec_time, rows, $1 * shared_blks_h
 calls           | 0
 total_exec_time | 0
 rows            | 0
-hit_percent     | 
+hit_percent     |
 
 </screen>
  </sect2>
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 4656f1b3db..21a9f43c65 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -21,6 +21,7 @@
 #include "access/xlog.h"
 #include "access/xloginsert.h"
 #include "catalog/catalog.h"
+#include "executor/instrument.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
@@ -209,6 +210,11 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 			ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
 									   limited_ts, &nnewlpdead, NULL);
 
+			/* Take into account that heap_page_prune() just generated a new
+			 * wal record with zero xl_xid that is not related to current query.
+			 */
+			pgWalUsage.extra_wal_recs++;
+
 			/*
 			 * Report the number of tuples reclaimed to pgstats.  This is
 			 * ndeleted minus the number of newly-LP_DEAD-set items.
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 3d9088a704..3b70146737 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -38,6 +38,7 @@
 #include "access/xlog.h"
 #include "access/xloginsert.h"
 #include "access/xlogutils.h"
+#include "executor/instrument.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
@@ -956,6 +957,12 @@ WriteZeroPageXlogRec(int pageno)
 	XLogBeginInsert();
 	XLogRegisterData((char *) (&pageno), sizeof(int));
 	(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
+
+	/*
+	 * Consider that a new unrelated to current query wal record
+	 * with zero xl_xid has just been created.
+	 */
+	pgWalUsage.extra_wal_recs++;
 }
 
 /*
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index c5ff02a842..291d58efe3 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -268,6 +268,7 @@ WalUsageAdd(WalUsage *dst, WalUsage *add)
 	dst->wal_bytes += add->wal_bytes;
 	dst->wal_records += add->wal_records;
 	dst->wal_fpi += add->wal_fpi;
+	dst->extra_wal_recs += add->extra_wal_recs;
 }
 
 void
@@ -276,4 +277,5 @@ WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
 	dst->wal_bytes += add->wal_bytes - sub->wal_bytes;
 	dst->wal_records += add->wal_records - sub->wal_records;
 	dst->wal_fpi += add->wal_fpi - sub->wal_fpi;
+	dst->extra_wal_recs += add->extra_wal_recs - sub->extra_wal_recs;
 }
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 1b7157bdd1..2a07369edc 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -49,6 +49,12 @@ typedef struct WalUsage
 	int64		wal_records;	/* # of WAL records produced */
 	int64		wal_fpi;		/* # of WAL full page images produced */
 	uint64		wal_bytes;		/* size of WAL records produced */
+	/*
+	 * Number of generated WAL records with weak relation to current query.
+	 * In particular due to heap_page_prune_opt() or WriteZeroPageXlogRec()
+	 * This number depends on previous query history.
+	 */
+	int64		extra_wal_recs;
 } WalUsage;
 
 /* Flag bits included in InstrAlloc's instrument_options bitmask */

Reply via email to