Hi everyone,
I believe we should also include this check in theĀ pgss_ExecutorEnd()
function because sampling in pgss_ExecutorEnd() ensures that a query not
initially sampled in pgss_ExecutorStart() can still be logged if it
meets the pg_stat_statements.sample_rate criteria. This approach adds
flexibility by allowing critical queries to be captured while
maintaining efficient sampling.
I attached new version of patch.
--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From c33e1ae07e8eb4f797b47e7deb07e6322b1375a3 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Tue, 19 Nov 2024 12:53:52 +0300
Subject: [PATCH] Allow setting sample ratio for pg_stat_statements
New configuration parameter pg_stat_statements.sample_ratio makes it
possible to control just a fraction of the queries meeting the configured
threshold, to reduce the amount of controlling.
---
.../pg_stat_statements/pg_stat_statements.c | 25 ++++++++++++++++---
doc/src/sgml/pgstatstatements.sgml | 17 +++++++++++++
2 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 49c657b3e0..42b3fee815 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -49,6 +49,7 @@
#include "access/parallel.h"
#include "catalog/pg_authid.h"
+#include "common/pg_prng.h"
#include "common/int.h"
#include "executor/instrument.h"
#include "funcapi.h"
@@ -294,7 +295,10 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
+static double pgss_sample_rate = 1;
+/* Is the current top-level query to be sampled? */
+static bool current_query_sampled = false;
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -414,6 +418,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomRealVariable("pg_stat_statements.sample_rate",
+ "Fraction of queries to process.",
+ NULL,
+ &pgss_sample_rate,
+ 1.0,
+ 0.0,
+ 1.0,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
DefineCustomEnumVariable("pg_stat_statements.track",
"Selects which statements are tracked by pg_stat_statements.",
NULL,
@@ -989,6 +1006,8 @@ pgss_planner(Query *parse,
static void
pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
+ current_query_sampled = (pg_prng_double(&pg_global_prng_state) < pgss_sample_rate);
+
if (prev_ExecutorStart)
prev_ExecutorStart(queryDesc, eflags);
else
@@ -999,7 +1018,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
* counting of optimizable statements that are directly contained in
* utility statements.
*/
- if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
+ if (current_query_sampled && pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
{
/*
* Set up to track total elapsed time in ExecutorRun. Make sure the
@@ -1068,8 +1087,8 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
{
uint64 queryId = queryDesc->plannedstmt->queryId;
- if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
- pgss_enabled(nesting_level))
+ if (current_query_sampled && queryId != UINT64CONST(0) &&
+ queryDesc->totaltime && pgss_enabled(nesting_level))
{
/*
* Make sure stats accumulation is done. (Note: it's okay if several
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..1e2533a802 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -936,6 +936,23 @@
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <varname>pg_stat_statements.sample_rate</varname> (<type>real</type>)
+ <indexterm>
+ <primary><varname>pg_stat_statements.sample_rate</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <varname>pg_stat_statements.sample_rate</varname> causes pg_stat_statements to only
+ control a fraction of the statements in each session. The default is 1,
+ meaning control all the queries. Only superusers can change this setting.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term>
<varname>pg_stat_statements.save</varname> (<type>boolean</type>)
--
2.34.1