I completely forgot about ordering pg_stat_statements in the test, which
is why the test failed in [0]. I've added ORDER BY query COLLATE "C" to
avoid any non-deterministic ordering in the table.
[0]: https://cirrus-ci.com/task/5724458477944832
--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From be8a69ae36d0a20c945ff12bb385d634ce50a84e Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.com>
Date: Tue, 7 Jan 2025 16:43:34 +0300
Subject: [PATCH] Allow setting sample ratio for pg_stat_statements
New configuration parameter pg_stat_statements.sample_ratio makes it
possible to track just a fraction of the queries meeting the configured
threshold, to reduce the amount of tracking.
---
.../pg_stat_statements/expected/select.out | 37 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 38 ++++++++++++++++++-
contrib/pg_stat_statements/sql/select.sql | 11 ++++++
doc/src/sgml/pgstatstatements.sgml | 18 +++++++++
4 files changed, 102 insertions(+), 2 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 37a30af034..8485a7c451 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -153,6 +153,43 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
(1 row)
+--
+-- sample statements
+--
+SET pg_stat_statements.sample_rate = 0.0;
+SELECT 1 AS "int";
+ int
+-----
+ 1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(1 row)
+
+SET pg_stat_statements.sample_rate = 1.0;
+SELECT 1 AS "int";
+ int
+-----
+ 1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT $1 AS "int" | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
--
-- queries with locking clauses
--
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb..4bab2a74ea 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -50,6 +50,7 @@
#include "access/parallel.h"
#include "catalog/pg_authid.h"
#include "common/int.h"
+#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "funcapi.h"
#include "jit/jit.h"
@@ -255,6 +256,7 @@ typedef struct pgssSharedState
/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
static int nesting_level = 0;
+static double random_sample = 0.0;
/* Saved hook values */
static shmem_request_hook_type prev_shmem_request_hook = NULL;
@@ -294,12 +296,17 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
+static double pgss_sample_rate = 1;
+
+/* Is the current top-level query to be sampled? */
+static bool current_query_sampled = false;
#define pgss_enabled(level) \
(!IsParallelWorker() && \
(pgss_track == PGSS_TRACK_ALL || \
- (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
+ (pgss_track == PGSS_TRACK_TOP && (level) == 0)) && \
+ current_query_sampled)
#define record_gc_qtexts() \
do { \
@@ -414,6 +421,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomRealVariable("pg_stat_statements.sample_rate",
+ "Fraction of queries to track.",
+ NULL,
+ &pgss_sample_rate,
+ 1.0,
+ 0.0,
+ 1.0,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
DefineCustomEnumVariable("pg_stat_statements.track",
"Selects which statements are tracked by pg_stat_statements.",
NULL,
@@ -835,6 +855,11 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
if (prev_post_parse_analyze_hook)
prev_post_parse_analyze_hook(pstate, query, jstate);
+ random_sample = pg_prng_double(&pg_global_prng_state);
+
+ if (nesting_level == 0)
+ current_query_sampled = random_sample < pgss_sample_rate;
+
/* Safety check... */
if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
return;
@@ -889,6 +914,9 @@ pgss_planner(Query *parse,
{
PlannedStmt *result;
+ if (nesting_level == 0)
+ current_query_sampled = random_sample < pgss_sample_rate;
+
/*
* We can't process the query if no query_string is provided, as
* pgss_store needs it. We also ignore query without queryid, as it would
@@ -1111,7 +1139,13 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
uint64 saved_queryId = pstmt->queryId;
int saved_stmt_location = pstmt->stmt_location;
int saved_stmt_len = pstmt->stmt_len;
- bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
+ bool enabled;
+
+ if (nesting_level == 0)
+ current_query_sampled = random_sample < pgss_sample_rate;
+
+ /* current_query_sampled is in pgss_enabled */
+ enabled = pgss_track_utility && pgss_enabled(nesting_level);
/*
* Force utility statements to get queryId zero. We do this even in cases
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index e0be58d5e2..def00d515d 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -59,6 +59,17 @@ DEALLOCATE pgss_test;
SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+--
+-- sample statements
+--
+SET pg_stat_statements.sample_rate = 0.0;
+SELECT 1 AS "int";
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+SET pg_stat_statements.sample_rate = 1.0;
+SELECT 1 AS "int";
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
--
-- queries with locking clauses
--
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..8ade4e3ced 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -936,6 +936,24 @@
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.sample_rate</varname> (<type>real</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.sample_rate</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <varname>pg_stat_statements.sample_rate</varname> causes pg_stat_statements to only
+ track a fraction of the statements in each session. The default is 1,
+ meaning track all the queries. In case of nested statements,
+ either all will be tracked or none. Only superusers can change this setting.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term>
<varname>pg_stat_statements.save</varname> (<type>boolean</type>)
--
2.34.1