From e76af0c23888cf8232b59ce8cbb65f6008d544a4 Mon Sep 17 00:00:00 2001
From: Daniel Farina <daniel@fdr.io>
Date: Tue, 2 Oct 2012 23:06:51 -0700
Subject: [PATCH] Add pg_stat_statements calls underestimation propagation

This tries to establish a maximum under-estimate of the number of
calls for a given pg_stat_statements entry.  That means the number of
calls to the canonical form of the query is between 'calls' and 'calls
+ calls_underest'.

This is useful to determine when accumulating statistics if a given
record is bouncing in and out of the pg_stat_statements table, having
its ncalls reset all the time, but also having calls_underest grow
very rapidly.

Records that always stay in pg_stat_statements will have a
calls_underest that do not change at all.

An interesting case is when a query that usually is called is not
called for a while, and falls out of pg_stat_statements.  The result
can be that the query with the most 'calls' can also have more
uncertainty than the query with the second most calls, which is also
exactly the truth in reality.

Unceremoniously bundled into this patch is a reduction in the minimum
table size for pg_stat_statements, from 100 to 1.  Using tiny values
is not likely to be seen in production, but makes testing the patch a
lot easier in some situations.

Signed-off-by: Daniel Farina <daniel@fdr.io>
---
 contrib/pg_stat_statements/Makefile                |    1 +
 .../pg_stat_statements--1.1--1.2.sql               |   43 +++++++++++++++++++
 .../pg_stat_statements/pg_stat_statements--1.2.sql |   44 ++++++++++++++++++++
 contrib/pg_stat_statements/pg_stat_statements.c    |   37 +++++++++++++++-
 .../pg_stat_statements/pg_stat_statements.control  |    2 +-
 5 files changed, 124 insertions(+), 3 deletions(-)
 create mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.1--1.2.sql
 create mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.2.sql

diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index e8aed61..8656c17 100644
*** a/contrib/pg_stat_statements/Makefile
--- b/contrib/pg_stat_statements/Makefile
***************
*** 5,10 **** OBJS = pg_stat_statements.o
--- 5,11 ----
  
  EXTENSION = pg_stat_statements
  DATA = pg_stat_statements--1.1.sql pg_stat_statements--1.0--1.1.sql \
+ 	pg_stat_statements--1.2.sql pg_stat_statements--1.1--1.2.sql \
  	pg_stat_statements--unpackaged--1.0.sql
  
  ifdef USE_PGXS
*** /dev/null
--- b/contrib/pg_stat_statements/pg_stat_statements--1.1--1.2.sql
***************
*** 0 ****
--- 1,43 ----
+ /* contrib/pg_stat_statements/pg_stat_statements--1.1--1.2.sql */
+ 
+ -- complain if script is sourced in psql, rather than via ALTER EXTENSION
+ \echo Use "ALTER EXTENSION pg_stat_statements UPDATE" to load this file. \quit
+ 
+ /* First we have to remove them from the extension */
+ ALTER EXTENSION pg_stat_statements DROP VIEW pg_stat_statements;
+ ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements();
+ 
+ /* Then we can drop them */
+ DROP VIEW pg_stat_statements;
+ DROP FUNCTION pg_stat_statements();
+ 
+ /* Now redefine */
+ CREATE FUNCTION pg_stat_statements(
+     OUT userid oid,
+     OUT dbid oid,
+     OUT query text,
+     OUT calls int8,
+     OUT calls_underest int8,
+     OUT total_time float8,
+     OUT rows int8,
+     OUT shared_blks_hit int8,
+     OUT shared_blks_read int8,
+     OUT shared_blks_dirtied int8,
+     OUT shared_blks_written int8,
+     OUT local_blks_hit int8,
+     OUT local_blks_read int8,
+     OUT local_blks_dirtied int8,
+     OUT local_blks_written int8,
+     OUT temp_blks_read int8,
+     OUT temp_blks_written int8,
+     OUT blk_read_time float8,
+     OUT blk_write_time float8
+ )
+ RETURNS SETOF record
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C;
+ 
+ CREATE VIEW pg_stat_statements AS
+   SELECT * FROM pg_stat_statements();
+ 
+ GRANT SELECT ON pg_stat_statements TO PUBLIC;
*** /dev/null
--- b/contrib/pg_stat_statements/pg_stat_statements--1.2.sql
***************
*** 0 ****
--- 1,44 ----
+ /* contrib/pg_stat_statements/pg_stat_statements--1.2.sql */
+ 
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "CREATE EXTENSION pg_stat_statements" to load this file. \quit
+ 
+ -- Register functions.
+ CREATE FUNCTION pg_stat_statements_reset()
+ RETURNS void
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C;
+ 
+ CREATE FUNCTION pg_stat_statements(
+     OUT userid oid,
+     OUT dbid oid,
+     OUT query text,
+     OUT calls int8,
+     OUT calls_underest int8,
+     OUT total_time float8,
+     OUT rows int8,
+     OUT shared_blks_hit int8,
+     OUT shared_blks_read int8,
+     OUT shared_blks_dirtied int8,
+     OUT shared_blks_written int8,
+     OUT local_blks_hit int8,
+     OUT local_blks_read int8,
+     OUT local_blks_dirtied int8,
+     OUT local_blks_written int8,
+     OUT temp_blks_read int8,
+     OUT temp_blks_written int8,
+     OUT blk_read_time float8,
+     OUT blk_write_time float8
+ )
+ RETURNS SETOF record
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C;
+ 
+ -- Register a view on the function for ease of use.
+ CREATE VIEW pg_stat_statements AS
+   SELECT * FROM pg_stat_statements();
+ 
+ GRANT SELECT ON pg_stat_statements TO PUBLIC;
+ 
+ -- Don't want this to be available to non-superusers.
+ REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
*** a/contrib/pg_stat_statements/pg_stat_statements.c
--- b/contrib/pg_stat_statements/pg_stat_statements.c
***************
*** 101,106 **** typedef struct pgssHashKey
--- 101,107 ----
  typedef struct Counters
  {
  	int64		calls;			/* # of times executed */
+ 	int64		calls_underest;	/* max underestimation of # of executions */
  	double		total_time;		/* total execution time, in msec */
  	int64		rows;			/* total # of retrieved or affected rows */
  	int64		shared_blks_hit;	/* # of shared buffer hits */
***************
*** 139,144 **** typedef struct pgssEntry
--- 140,154 ----
  typedef struct pgssSharedState
  {
  	LWLockId	lock;			/* protects hashtable search/modification */
+ 
+ 	/*
+ 	 * cache of maximum calls-counter underestimation in hashtab
+ 	 *
+ 	 * Only accessed and changed along with the hash table, so also protected
+ 	 * by 'lock'.
+ 	 */
+ 	int64		calls_max_underest;
+ 
  	int			query_size;		/* max query length in bytes */
  	double		cur_median_usage;		/* current median usage in hashtable */
  } pgssSharedState;
***************
*** 292,298 **** _PG_init(void)
  							NULL,
  							&pgss_max,
  							1000,
! 							100,
  							INT_MAX,
  							PGC_POSTMASTER,
  							0,
--- 302,308 ----
  							NULL,
  							&pgss_max,
  							1000,
! 							1,
  							INT_MAX,
  							PGC_POSTMASTER,
  							0,
***************
*** 1066,1072 **** pg_stat_statements_reset(PG_FUNCTION_ARGS)
  }
  
  #define PG_STAT_STATEMENTS_COLS_V1_0	14
! #define PG_STAT_STATEMENTS_COLS			18
  
  /*
   * Retrieve statement statistics.
--- 1076,1082 ----
  }
  
  #define PG_STAT_STATEMENTS_COLS_V1_0	14
! #define PG_STAT_STATEMENTS_COLS			19
  
  /*
   * Retrieve statement statistics.
***************
*** 1163,1168 **** pg_stat_statements(PG_FUNCTION_ARGS)
--- 1173,1179 ----
  			continue;
  
  		values[i++] = Int64GetDatumFast(tmp.calls);
+ 		values[i++] = Int64GetDatumFast(tmp.calls_underest);
  		values[i++] = Float8GetDatumFast(tmp.total_time);
  		values[i++] = Int64GetDatumFast(tmp.rows);
  		values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
***************
*** 1251,1256 **** entry_alloc(pgssHashKey *key, const char *query, int query_len, bool sticky)
--- 1262,1271 ----
  		memset(&entry->counters, 0, sizeof(Counters));
  		/* set the appropriate initial usage count */
  		entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
+ 
+ 		/* propagate calls under-estimation bound */
+ 		entry->counters.calls_underest = pgss->calls_max_underest;
+ 
  		/* re-initialize the mutex each time ... we assume no one using it */
  		SpinLockInit(&entry->mutex);
  		/* ... and don't forget the query text */
***************
*** 1283,1288 **** entry_cmp(const void *lhs, const void *rhs)
--- 1298,1306 ----
  /*
   * Deallocate least used entries.
   * Caller must hold an exclusive lock on pgss->lock.
+  *
+  * Also increases the underestimation maximum in pgss as a side
+  * effect, if necessary.
   */
  static void
  entry_dealloc(void)
***************
*** 1305,1316 **** entry_dealloc(void)
--- 1323,1349 ----
  	hash_seq_init(&hash_seq, pgss_hash);
  	while ((entry = hash_seq_search(&hash_seq)) != NULL)
  	{
+ 		const Counters *cur_counts = &entry->counters;
+ 		int64 cur_underest;
+ 
  		entries[i++] = entry;
+ 
  		/* "Sticky" entries get a different usage decay rate. */
  		if (entry->counters.calls == 0)
  			entry->counters.usage *= STICKY_DECREASE_FACTOR;
  		else
  			entry->counters.usage *= USAGE_DECREASE_FACTOR;
+ 
+ 		/*
+ 		 * Update global calls estimation state, if necessary.
+ 		 *
+ 		 * NB: It is necessary to compute the uncertainty over *all*
+ 		 * entries rather than from just those that will undergo
+ 		 * eviction.  This is because the metric used to choose
+ 		 * eviction is different than the metric reported to the user.
+ 		 */
+ 		cur_underest = cur_counts->calls + cur_counts->calls_underest;
+ 		pgss->calls_max_underest = Max(pgss->calls_max_underest, cur_underest);
  	}
  
  	qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
*** a/contrib/pg_stat_statements/pg_stat_statements.control
--- b/contrib/pg_stat_statements/pg_stat_statements.control
***************
*** 1,5 ****
  # pg_stat_statements extension
  comment = 'track execution statistics of all SQL statements executed'
! default_version = '1.1'
  module_pathname = '$libdir/pg_stat_statements'
  relocatable = true
--- 1,5 ----
  # pg_stat_statements extension
  comment = 'track execution statistics of all SQL statements executed'
! default_version = '1.2'
  module_pathname = '$libdir/pg_stat_statements'
  relocatable = true
