From 5e5a97dfd280b31c1cc2e6f04f5efd6b50a895b7 Mon Sep 17 00:00:00 2001
From: Lukas Fittl <lukas@fittl.com>
Date: Sat, 28 Feb 2026 15:33:56 -0800
Subject: [PATCH v1] pg_buffercache: Add pg_buffercache_relation_stats()
 function

This function returns an aggregation of buffer contents, grouped on a
per-relfilenode basis. This is often useful to understand which tables
or indexes are currently in cache, and can show cache disruptions due
to query activity when sampled over time. The existing pg_buffercache()
function can be utilized for this by grouping the result, but due to
the amount of buffer entries (one per page) this can be prohibitively
expensive on large machines. Even on a small shared buffers (128MB) the
new function is 10x faster. Similar to the existing summary functions
this new function does not hold a lock whilst gathering its statistics.

Author: Lukas Fittl <lukas@fittl.com>
Reviewed by:
Discussion:
---
 contrib/pg_buffercache/Makefile               |   3 +-
 .../expected/pg_buffercache.out               |  14 ++
 contrib/pg_buffercache/meson.build            |   1 +
 .../pg_buffercache--1.7--1.8.sql              |  20 +++
 contrib/pg_buffercache/pg_buffercache.control |   2 +-
 contrib/pg_buffercache/pg_buffercache_pages.c | 134 ++++++++++++++++++
 contrib/pg_buffercache/sql/pg_buffercache.sql |   4 +
 doc/src/sgml/pgbuffercache.sgml               | 130 +++++++++++++++++
 src/tools/pgindent/typedefs.list              |   2 +
 9 files changed, 308 insertions(+), 2 deletions(-)
 create mode 100644 contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql

diff --git a/contrib/pg_buffercache/Makefile b/contrib/pg_buffercache/Makefile
index 0e618f66aec..7fd5cdfc43d 100644
--- a/contrib/pg_buffercache/Makefile
+++ b/contrib/pg_buffercache/Makefile
@@ -9,7 +9,8 @@ EXTENSION = pg_buffercache
 DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \
 	pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql \
 	pg_buffercache--1.3--1.4.sql pg_buffercache--1.4--1.5.sql \
-	pg_buffercache--1.5--1.6.sql pg_buffercache--1.6--1.7.sql
+	pg_buffercache--1.5--1.6.sql pg_buffercache--1.6--1.7.sql \
+	pg_buffercache--1.7--1.8.sql
 PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time"
 
 REGRESS = pg_buffercache pg_buffercache_numa
diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out
index 886dea770f6..cb5507a0d92 100644
--- a/contrib/pg_buffercache/expected/pg_buffercache.out
+++ b/contrib/pg_buffercache/expected/pg_buffercache.out
@@ -33,6 +33,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
  t
 (1 row)
 
+SELECT count(*) > 0 FROM pg_buffercache_relation_stats() WHERE buffers >= 0;
+ ?column? 
+----------
+ t
+(1 row)
+
 -- Check that the functions / views can't be accessed by default. To avoid
 -- having to create a dedicated user, use the pg_database_owner pseudo-role.
 SET ROLE pg_database_owner;
@@ -46,6 +52,8 @@ SELECT * FROM pg_buffercache_summary();
 ERROR:  permission denied for function pg_buffercache_summary
 SELECT * FROM pg_buffercache_usage_counts();
 ERROR:  permission denied for function pg_buffercache_usage_counts
+SELECT * FROM pg_buffercache_relation_stats();
+ERROR:  permission denied for function pg_buffercache_relation_stats
 RESET role;
 -- Check that pg_monitor is allowed to query view / function
 SET ROLE pg_monitor;
@@ -73,6 +81,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts();
  t
 (1 row)
 
+SELECT count(*) > 0 FROM pg_buffercache_relation_stats();
+ ?column? 
+----------
+ t
+(1 row)
+
 RESET role;
 ------
 ---- Test pg_buffercache_evict* and pg_buffercache_mark_dirty* functions
diff --git a/contrib/pg_buffercache/meson.build b/contrib/pg_buffercache/meson.build
index e681205abb2..361628b8bea 100644
--- a/contrib/pg_buffercache/meson.build
+++ b/contrib/pg_buffercache/meson.build
@@ -25,6 +25,7 @@ install_data(
   'pg_buffercache--1.4--1.5.sql',
   'pg_buffercache--1.5--1.6.sql',
   'pg_buffercache--1.6--1.7.sql',
+  'pg_buffercache--1.7--1.8.sql',
   'pg_buffercache.control',
   kwargs: contrib_data_args,
 )
diff --git a/contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql b/contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql
new file mode 100644
index 00000000000..9619d1c3e85
--- /dev/null
+++ b/contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql
@@ -0,0 +1,20 @@
+/* contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.8'" to load this file. \quit
+
+CREATE FUNCTION pg_buffercache_relation_stats(
+    OUT relfilenode oid,
+    OUT reltablespace oid,
+    OUT reldatabase oid,
+    OUT relforknumber int2,
+    OUT buffers int4,
+    OUT buffers_dirty int4,
+    OUT buffers_pinned int4,
+    OUT usagecount_avg float8)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_buffercache_relation_stats'
+LANGUAGE C PARALLEL SAFE;
+
+REVOKE ALL ON FUNCTION pg_buffercache_relation_stats() FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_buffercache_relation_stats() TO pg_monitor;
diff --git a/contrib/pg_buffercache/pg_buffercache.control b/contrib/pg_buffercache/pg_buffercache.control
index 11499550945..d2fa8ba53ba 100644
--- a/contrib/pg_buffercache/pg_buffercache.control
+++ b/contrib/pg_buffercache/pg_buffercache.control
@@ -1,5 +1,5 @@
 # pg_buffercache extension
 comment = 'examine the shared buffer cache'
-default_version = '1.7'
+default_version = '1.8'
 module_pathname = '$libdir/pg_buffercache'
 relocatable = true
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 89b86855243..b16a421cb77 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -15,6 +15,7 @@
 #include "port/pg_numa.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
+#include "utils/hsearch.h"
 #include "utils/rel.h"
 
 
@@ -22,6 +23,7 @@
 #define NUM_BUFFERCACHE_PAGES_ELEM	9
 #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
 #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
+#define NUM_BUFFERCACHE_RELATION_STATS_ELEM 8
 #define NUM_BUFFERCACHE_EVICT_ELEM 2
 #define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
 #define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
@@ -107,8 +109,33 @@ PG_FUNCTION_INFO_V1(pg_buffercache_evict_all);
 PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty);
 PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_relation);
 PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_all);
+PG_FUNCTION_INFO_V1(pg_buffercache_relation_stats);
 
 
+/*
+ * Hash key for pg_buffercache_relation_stats — groups by relation identity.
+ */
+typedef struct
+{
+	RelFileNumber relfilenumber;
+	Oid			reltablespace;
+	Oid			reldatabase;
+	ForkNumber	forknum;
+} BufferRelStatsKey;
+
+/*
+ * Hash entry for pg_buffercache_relation_stats — accumulates per-relation
+ * buffer statistics.
+ */
+typedef struct
+{
+	BufferRelStatsKey key;		/* must be first */
+	int32		buffers;
+	int32		buffers_dirty;
+	int32		buffers_pinned;
+	int64		usagecount_total;
+} BufferRelStatsEntry;
+
 /* Only need to touch memory once per backend process lifetime */
 static bool firstNumaTouch = true;
 
@@ -958,3 +985,110 @@ pg_buffercache_mark_dirty_all(PG_FUNCTION_ARGS)
 
 	PG_RETURN_DATUM(result);
 }
+
+/*
+ * pg_buffercache_relation_stats
+ *
+ * Produces a set of rows that summarize buffer cache usage per relation-fork
+ * combination. This enables monitoring scripts to only get the summary stats,
+ * instead of accumulating in a query with the full buffer information.
+ */
+Datum
+pg_buffercache_relation_stats(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	HTAB	   *relstats_hash;
+	HASHCTL		hash_ctl;
+	HASH_SEQ_STATUS hash_seq;
+	BufferRelStatsEntry *entry;
+	Datum		values[NUM_BUFFERCACHE_RELATION_STATS_ELEM];
+	bool		nulls[NUM_BUFFERCACHE_RELATION_STATS_ELEM] = {0};
+
+	InitMaterializedSRF(fcinfo, 0);
+
+	/* Create a hash table to aggregate stats by relation-fork */
+	hash_ctl.keysize = sizeof(BufferRelStatsKey);
+	hash_ctl.entrysize = sizeof(BufferRelStatsEntry);
+	hash_ctl.hcxt = CurrentMemoryContext;
+
+	relstats_hash = hash_create("pg_buffercache relation stats",
+								128,
+								&hash_ctl,
+								HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+	/* Single pass over all buffers */
+	for (int i = 0; i < NBuffers; i++)
+	{
+		BufferDesc *bufHdr;
+		uint64		buf_state;
+		BufferRelStatsKey key;
+		bool		found;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Read buffer state without locking, same as pg_buffercache_summary
+		 * and pg_buffercache_usage_counts.  Locking wouldn't provide a
+		 * meaningfully more consistent result since buffers can change state
+		 * immediately after we release the lock.
+		 */
+		bufHdr = GetBufferDescriptor(i);
+		buf_state = pg_atomic_read_u64(&bufHdr->state);
+
+		/* Skip unused/invalid buffers */
+		if (!(buf_state & BM_VALID))
+			continue;
+
+		key.relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
+		key.reltablespace = bufHdr->tag.spcOid;
+		key.reldatabase = bufHdr->tag.dbOid;
+		key.forknum = BufTagGetForkNum(&bufHdr->tag);
+
+		entry = (BufferRelStatsEntry *) hash_search(relstats_hash,
+													&key,
+													HASH_ENTER,
+													&found);
+
+		if (!found)
+		{
+			entry->buffers = 0;
+			entry->buffers_dirty = 0;
+			entry->buffers_pinned = 0;
+			entry->usagecount_total = 0;
+		}
+
+		entry->buffers++;
+		entry->usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
+
+		if (buf_state & BM_DIRTY)
+			entry->buffers_dirty++;
+
+		if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
+			entry->buffers_pinned++;
+	}
+
+	/* Emit one row per hash entry */
+	hash_seq_init(&hash_seq, relstats_hash);
+	while ((entry = (BufferRelStatsEntry *) hash_seq_search(&hash_seq)) != NULL)
+	{
+		if (entry->buffers == 0)
+			continue;
+
+		values[0] = ObjectIdGetDatum(entry->key.relfilenumber);
+		values[1] = ObjectIdGetDatum(entry->key.reltablespace);
+		values[2] = ObjectIdGetDatum(entry->key.reldatabase);
+		values[3] = Int16GetDatum(entry->key.forknum);
+		values[4] = Int32GetDatum(entry->buffers);
+		values[5] = Int32GetDatum(entry->buffers_dirty);
+		values[6] = Int32GetDatum(entry->buffers_pinned);
+		values[7] = Float8GetDatum((double) entry->usagecount_total /
+								   entry->buffers);
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+							 values, nulls);
+	}
+
+	hash_destroy(relstats_hash);
+
+	return (Datum) 0;
+}
diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql
index 127d604905c..ea5950855d2 100644
--- a/contrib/pg_buffercache/sql/pg_buffercache.sql
+++ b/contrib/pg_buffercache/sql/pg_buffercache.sql
@@ -18,6 +18,8 @@ from pg_buffercache_summary();
 
 SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
 
+SELECT count(*) > 0 FROM pg_buffercache_relation_stats() WHERE buffers >= 0;
+
 -- Check that the functions / views can't be accessed by default. To avoid
 -- having to create a dedicated user, use the pg_database_owner pseudo-role.
 SET ROLE pg_database_owner;
@@ -26,6 +28,7 @@ SELECT * FROM pg_buffercache_os_pages;
 SELECT * FROM pg_buffercache_pages() AS p (wrong int);
 SELECT * FROM pg_buffercache_summary();
 SELECT * FROM pg_buffercache_usage_counts();
+SELECT * FROM pg_buffercache_relation_stats();
 RESET role;
 
 -- Check that pg_monitor is allowed to query view / function
@@ -34,6 +37,7 @@ SELECT count(*) > 0 FROM pg_buffercache;
 SELECT count(*) > 0 FROM pg_buffercache_os_pages;
 SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary();
 SELECT count(*) > 0 FROM pg_buffercache_usage_counts();
+SELECT count(*) > 0 FROM pg_buffercache_relation_stats();
 RESET role;
 
 
diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml
index 1e9aee10275..921ba9b5306 100644
--- a/doc/src/sgml/pgbuffercache.sgml
+++ b/doc/src/sgml/pgbuffercache.sgml
@@ -31,6 +31,10 @@
   <primary>pg_buffercache_usage_counts</primary>
  </indexterm>
 
+ <indexterm>
+  <primary>pg_buffercache_relation_stats</primary>
+ </indexterm>
+
  <indexterm>
   <primary>pg_buffercache_evict</primary>
  </indexterm>
@@ -63,6 +67,7 @@
   <structname>pg_buffercache_numa</structname> views), the
   <function>pg_buffercache_summary()</function> function, the
   <function>pg_buffercache_usage_counts()</function> function, the
+  <function>pg_buffercache_relation_stats()</function> function, the
   <function>pg_buffercache_evict()</function> function, the
   <function>pg_buffercache_evict_relation()</function> function, the
   <function>pg_buffercache_evict_all()</function> function, the
@@ -102,6 +107,12 @@
   count.
  </para>
 
+ <para>
+  The <function>pg_buffercache_relation_stats()</function> function returns a
+  set of rows summarizing buffer cache usage aggregated by relation and fork
+  number.
+ </para>
+
  <para>
   By default, use of the above functions is restricted to superusers and roles
   with privileges of the <literal>pg_monitor</literal> role. Access may be
@@ -564,6 +575,125 @@
   </para>
  </sect2>
 
+ <sect2 id="pgbuffercache-relation-stats">
+  <title>The <function>pg_buffercache_relation_stats()</function> Function</title>
+
+  <para>
+   The definitions of the columns exposed by the function are shown in
+   <xref linkend="pgbuffercache_relation_stats-columns"/>.
+  </para>
+
+  <table id="pgbuffercache_relation_stats-columns">
+   <title><function>pg_buffercache_relation_stats()</function> Output Columns</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>relfilenode</structfield> <type>oid</type>
+       (references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relfilenode</structfield>)
+      </para>
+      <para>
+       Filenode number of the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>reltablespace</structfield> <type>oid</type>
+       (references <link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link>.<structfield>oid</structfield>)
+      </para>
+      <para>
+       Tablespace OID of the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>reldatabase</structfield> <type>oid</type>
+       (references <link linkend="catalog-pg-database"><structname>pg_database</structname></link>.<structfield>oid</structfield>)
+      </para>
+      <para>
+       Database OID of the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>relforknumber</structfield> <type>smallint</type>
+      </para>
+      <para>
+       Fork number within the relation;  see
+       <filename>common/relpath.h</filename>
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>buffers</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of buffers for the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>buffers_dirty</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of dirty buffers for the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>buffers_pinned</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of pinned buffers for the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>usagecount_avg</structfield> <type>float8</type>
+      </para>
+      <para>
+       Average usage count of the relation's buffers
+      </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   The <function>pg_buffercache_relation_stats()</function> function returns a
+   set of rows summarizing the state of all shared buffers, aggregated by
+   relation and fork number.  Similar and more detailed information is
+   provided by the <structname>pg_buffercache</structname> view, but
+   <function>pg_buffercache_relation_stats()</function> is significantly
+   cheaper.
+  </para>
+
+  <para>
+   Like the <structname>pg_buffercache</structname> view,
+   <function>pg_buffercache_relation_stats()</function> does not acquire buffer
+   manager locks. Therefore concurrent activity can lead to minor inaccuracies
+   in the result.
+  </para>
+ </sect2>
+
  <sect2 id="pgbuffercache-pg-buffercache-evict">
   <title>The <function>pg_buffercache_evict()</function> Function</title>
   <para>
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 77e3c04144e..19a87f702ee 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -357,6 +357,8 @@ BufferHeapTupleTableSlot
 BufferLockMode
 BufferLookupEnt
 BufferManagerRelation
+BufferRelStatsEntry
+BufferRelStatsKey
 BufferStrategyControl
 BufferTag
 BufferUsage
-- 
2.47.1

