Hi everyone,

I believe we should also include this check in theĀ pgss_ExecutorEnd() function because sampling in pgss_ExecutorEnd() ensures that a query not initially sampled in pgss_ExecutorStart() can still be logged if it meets the pg_stat_statements.sample_rate criteria. This approach adds flexibility by allowing critical queries to be captured while maintaining efficient sampling.

I attached new version of patch.

--
Best regards,
Ilia Evdokimov,
Tantor Labs LLC.
From c33e1ae07e8eb4f797b47e7deb07e6322b1375a3 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Tue, 19 Nov 2024 12:53:52 +0300
Subject: [PATCH] Allow setting sample ratio for pg_stat_statements

New configuration parameter pg_stat_statements.sample_ratio makes it
possible to control just a fraction of the queries meeting the configured
threshold, to reduce the amount of controlling.
---
 .../pg_stat_statements/pg_stat_statements.c   | 25 ++++++++++++++++---
 doc/src/sgml/pgstatstatements.sgml            | 17 +++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 49c657b3e0..42b3fee815 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -49,6 +49,7 @@
 
 #include "access/parallel.h"
 #include "catalog/pg_authid.h"
+#include "common/pg_prng.h"
 #include "common/int.h"
 #include "executor/instrument.h"
 #include "funcapi.h"
@@ -294,7 +295,10 @@ static bool pgss_track_utility = true;	/* whether to track utility commands */
 static bool pgss_track_planning = false;	/* whether to track planning
 											 * duration */
 static bool pgss_save = true;	/* whether to save stats across shutdown */
+static double pgss_sample_rate = 1;
 
+/* Is the current top-level query to be sampled? */
+static bool current_query_sampled = false;
 
 #define pgss_enabled(level) \
 	(!IsParallelWorker() && \
@@ -414,6 +418,19 @@ _PG_init(void)
 							NULL,
 							NULL);
 
+	DefineCustomRealVariable("pg_stat_statements.sample_rate",
+							 "Fraction of queries to process.",
+							 NULL,
+							 &pgss_sample_rate,
+							 1.0,
+							 0.0,
+							 1.0,
+							 PGC_SUSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
 	DefineCustomEnumVariable("pg_stat_statements.track",
 							 "Selects which statements are tracked by pg_stat_statements.",
 							 NULL,
@@ -989,6 +1006,8 @@ pgss_planner(Query *parse,
 static void
 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
+	current_query_sampled = (pg_prng_double(&pg_global_prng_state) < pgss_sample_rate);
+
 	if (prev_ExecutorStart)
 		prev_ExecutorStart(queryDesc, eflags);
 	else
@@ -999,7 +1018,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
 	 * counting of optimizable statements that are directly contained in
 	 * utility statements.
 	 */
-	if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
+	if (current_query_sampled && pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
 	{
 		/*
 		 * Set up to track total elapsed time in ExecutorRun.  Make sure the
@@ -1068,8 +1087,8 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
 {
 	uint64		queryId = queryDesc->plannedstmt->queryId;
 
-	if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
-		pgss_enabled(nesting_level))
+	if (current_query_sampled && queryId != UINT64CONST(0) &&
+		queryDesc->totaltime && pgss_enabled(nesting_level))
 	{
 		/*
 		 * Make sure stats accumulation is done.  (Note: it's okay if several
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..1e2533a802 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -936,6 +936,23 @@
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term>
+     <varname>pg_stat_statements.sample_rate</varname> (<type>real</type>)
+     <indexterm>
+      <primary><varname>pg_stat_statements.sample_rate</varname> configuration parameter</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <varname>pg_stat_statements.sample_rate</varname> causes pg_stat_statements to only
+      control a fraction of the statements in each session.  The default is 1,
+      meaning control all the queries. Only superusers can change this setting.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term>
      <varname>pg_stat_statements.save</varname> (<type>boolean</type>)
-- 
2.34.1

Reply via email to