From 7a5244805d4694a74f073a89b2e808310810bbd1 Mon Sep 17 00:00:00 2001
From: "Sami Imseih (AWS)" <simseih@amazon.com>
Date: Mon, 14 Mar 2022 14:26:20 +0000
Subject: [PATCH v7 3/4] Expose indexes being processed in a VACUUM operation.

A new view called pg_stat_progress_vacuum_index to show the indexrelid
being vacuumed or cleaned during a vacuum. The view also shows the
number of tuples removed for the index during the vacuuming indexes phase.

Author: Sami Imseih, based on suggestions by Nathan Bossart, Peter Geoghegan and Masahiko Sawada
Reviewed by: Nathan Bossart, Justin Pryzby
---
 doc/src/sgml/monitoring.sgml          | 108 ++++++++++++++++++++++++++
 src/backend/access/gin/ginvacuum.c    |   3 +
 src/backend/access/gist/gistvacuum.c  |   3 +
 src/backend/access/hash/hash.c        |   1 +
 src/backend/access/heap/vacuumlazy.c  |  14 ++++
 src/backend/access/nbtree/nbtree.c    |   1 +
 src/backend/access/spgist/spgvacuum.c |   4 +
 src/backend/catalog/system_views.sql  |  18 ++++-
 src/backend/commands/vacuumparallel.c |  12 +++
 src/include/commands/progress.h       |   3 +
 src/test/regress/expected/rules.out   |  17 +++-
 11 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 1acc741da9..240d2438d5 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -379,6 +379,15 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_progress_vacuum_index</structname><indexterm><primary>pg_stat_progress_vacuum_index</primary></indexterm></entry>
+      <entry>One row for each backend (including autovacuum worker processes) that is
+       currently performing the <literal>vacuuming indexes</literal> or
+       <literal>cleaning up indexes</literal> phase of the vacuum, showing current progress.
+       See <xref linkend='vacuum-progress-reporting'/>.
+      </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_progress_cluster</structname><indexterm><primary>pg_stat_progress_cluster</primary></indexterm></entry>
       <entry>One row for each backend running
@@ -6256,6 +6265,105 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
    </tgroup>
   </table>
 
+  <indexterm>
+   <primary>pg_stat_progress_vacuum_index</primary>
+  </indexterm>
+
+  <para>
+   Whenever <command>VACUUM</command> is running, the
+   <structname>pg_stat_progress_vacuum_index</structname> view will contain
+   one row for each backend (including autovacuum worker processes) that is
+   currently performing the <literal>vacuuming indexes</literal> or
+   <literal>cleaning up indexes</literal> phase of the vacuum.
+  </para>
+
+  <table id="pg-stat-progress-vacuum-index-view" xreflabel="pg_stat_progress_vacuum_index">
+   <title><structname>pg_stat_progress_vacuum_index</structname> View</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>pid</structfield> <type>integer</type>
+      </para>
+      <para>
+       Process ID of backend.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>datid</structfield> <type>oid</type>
+      </para>
+      <para>
+       OID of the database to which this backend is connected.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>datname</structfield> <type>name</type>
+      </para>
+      <para>
+       Name of the database to which this backend is connected.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>indexrelid</structfield> <type>oid</type>
+      </para>
+      <para>
+       OID of the index being processed in the ongoing phase of the vacuum.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>leader_pid</structfield> <type>integer</type>
+      </para>
+      <para>
+       Process ID of the parallel vacuum leader, if this process is a
+       parallel vacuum worker.  <literal>NULL</literal> if this process is a
+       parallel vacuum leader or does not participate in parallel vacuum.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>phase</structfield> <type>text</type>
+      </para>
+      <para>
+       Current processing phase of a vacuum. Only the
+       <literal>vacuuming indexes</literal> or <literal>cleaning up indexes</literal>
+       phase will be listed in this view. See <xref linkend="vacuum-phases"/>.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>tuples_removed</structfield> <type>oid</type>
+      </para>
+      <para>
+       The number of index tuples removed by the <literal>vacuuming indexes</literal> phase.
+       This field is <literal>0</literal> during the <literal>cleaning up indexes</literal>
+       phase.
+      </para></entry>
+     </row>
+     </tbody>
+    </tgroup>
+   </table>
+
   <table id="vacuum-phases">
    <title>VACUUM Phases</title>
    <tgroup cols="2">
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index b4fa5f6bf8..1d5d003780 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -17,8 +17,10 @@
 #include "access/gin_private.h"
 #include "access/ginxlog.h"
 #include "access/xloginsert.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
@@ -60,6 +62,7 @@ ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items,
 		if (gvs->callback(items + i, gvs->callback_state))
 		{
 			gvs->result->tuples_removed += 1;
+			pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, gvs->result->tuples_removed);
 			if (!tmpitems)
 			{
 				/*
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index aac4afab8f..8a0f23388b 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -17,9 +17,11 @@
 #include "access/genam.h"
 #include "access/gist_private.h"
 #include "access/transam.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "lib/integerset.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
 #include "utils/memutils.h"
@@ -375,6 +377,7 @@ restart:
 			END_CRIT_SECTION();
 
 			vstate->stats->tuples_removed += ntodelete;
+			pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, vstate->stats->tuples_removed);
 			/* must recompute maxoff */
 			maxoff = PageGetMaxOffsetNumber(page);
 		}
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index a259a301fa..23dacee52e 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -632,6 +632,7 @@ loop_top:
 	stats->estimated_count = false;
 	stats->num_index_tuples = num_index_tuples;
 	stats->tuples_removed += tuples_removed;
+	pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, stats->tuples_removed);
 	/* hashvacuumcleanup will fill in num_pages */
 
 	return stats;
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 65e4440dc3..f33faabbe9 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -348,6 +348,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 
 	pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
 								  RelationGetRelid(rel));
+	pgstat_progress_update_param(PROGRESS_VACUUM_LEADER_PID, MyProcPid);
 
 	/*
 	 * Get OldestXmin cutoff, which is used to determine which deleted tuples
@@ -2326,12 +2327,19 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 			Relation	indrel = vacrel->indrels[idx];
 			IndexBulkDeleteResult *istat = vacrel->indstats[idx];
 
+			/* Advertise the index being vacuumed in non-parallel vacuum */
+			pgstat_progress_update_param(PROGRESS_VACUUM_INDEXRELID, RelationGetRelid(indrel));
+
 			vacrel->indstats[idx] =
 				lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples,
 									  vacrel);
 
 			pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_COMPLETED, indexes_processed++);
 
+			/* Advertise we are done vacuuming indexes in non-parallel vacuum */
+			pgstat_progress_update_param(PROGRESS_VACUUM_INDEXRELID, 0);
+			pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, 0);
+
 			if (lazy_check_wraparound_failsafe(vacrel))
 			{
 				/* Wraparound emergency -- end current index scan */
@@ -2700,11 +2708,17 @@ lazy_cleanup_all_indexes(LVRelState *vacrel)
 			Relation	indrel = vacrel->indrels[idx];
 			IndexBulkDeleteResult *istat = vacrel->indstats[idx];
 
+			/* Advertise the index being cleaned in non-parallel vacuum */
+			pgstat_progress_update_param(PROGRESS_VACUUM_INDEXRELID, RelationGetRelid(indrel));
+
 			vacrel->indstats[idx] =
 				lazy_cleanup_one_index(indrel, istat, reltuples,
 									   estimated_count, vacrel);
 
 			pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_COMPLETED, indexes_processed++);
+
+			/* Advertise we are done cleaning indexes in non-parallel vacuum */
+			pgstat_progress_update_param(PROGRESS_VACUUM_INDEXRELID, 0);
 		}
 	}
 	else
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index c9b4964c1e..09edf49082 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -1273,6 +1273,7 @@ backtrack:
 								nupdatable);
 
 			stats->tuples_removed += nhtidsdead;
+			pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, stats->tuples_removed);
 			/* must recompute maxoff */
 			maxoff = PageGetMaxOffsetNumber(page);
 
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index 0049630532..db73f8ef59 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -21,8 +21,10 @@
 #include "access/transam.h"
 #include "access/xloginsert.h"
 #include "catalog/storage_xlog.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
@@ -160,6 +162,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
 				bds->stats->tuples_removed += 1;
 				deletable[i] = true;
 				nDeletable++;
+				pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, bds->stats->tuples_removed);
 			}
 			else
 			{
@@ -430,6 +433,7 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
 				bds->stats->tuples_removed += 1;
 				toDelete[xlrec.nDelete] = i;
 				xlrec.nDelete++;
+				pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, bds->stats->tuples_removed);
 			}
 			else
 			{
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 35ea25026f..88375aad0a 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1129,7 +1129,23 @@ CREATE VIEW pg_stat_progress_vacuum AS
         S.param6 AS max_dead_tuples, S.param7 AS num_dead_tuples,
         S.param8 AS indexes_total, S.param9 AS indexes_processed
     FROM pg_stat_get_progress_info('VACUUM') AS S
-        LEFT JOIN pg_database D ON S.datid = D.oid;
+        LEFT JOIN pg_database D ON S.datid = D.oid
+    WHERE S.pid = S.param12;
+
+CREATE VIEW pg_stat_progress_vacuum_index AS
+    SELECT
+        S.pid AS pid,
+        S.datid AS datid,
+        D.datname AS datname,
+        S.param10 AS indexrelid,
+        S.param12 AS leader_pid,
+        CASE S.param1 WHEN 2 THEN 'vacuuming indexes'
+                      WHEN 4 THEN 'cleaning up indexes'
+                      END AS phase,
+        S.param11 AS tuples_removed
+    FROM pg_stat_get_progress_info('VACUUM') AS S
+        LEFT JOIN pg_database D ON S.datid = D.oid
+    WHERE S.param1 IN (2, 4) AND S.param10 > 0;
 
 CREATE VIEW pg_stat_progress_cluster AS
     SELECT
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index b491728425..a8a1f221e5 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -845,13 +845,18 @@ parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
 	pvs->indname = pstrdup(RelationGetRelationName(indrel));
 	pvs->status = indstats->status;
 
+	/* Advertise the index we are cleaning or vacuuming */
+	pgstat_progress_update_param(PROGRESS_VACUUM_INDEXRELID, RelationGetRelid(indrel));
+
 	switch (indstats->status)
 	{
 		case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE:
+			pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
 			istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items);
 			pgstat_progress_update_param_parallel(pvs->shared->leader_pid, PROGRESS_VACUUM_INDEXES_COMPLETED, 1);
 			break;
 		case PARALLEL_INDVAC_STATUS_NEED_CLEANUP:
+			pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
 			istat_res = vac_cleanup_one_index(&ivinfo, istat);
 			pgstat_progress_update_param_parallel(pvs->shared->leader_pid, PROGRESS_VACUUM_INDEXES_COMPLETED, 1);
 			break;
@@ -888,6 +893,10 @@ parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
 	 */
 	indstats->status = PARALLEL_INDVAC_STATUS_COMPLETED;
 
+	/* Advertise we are no longer vacuuming/cleaning an index */
+	pgstat_progress_update_param(PROGRESS_VACUUM_INDEXRELID, 0);
+	pgstat_progress_update_param(PROGRESS_VACUUM_TUPLES_REMOVED, 0);
+
 	/* Reset error traceback information */
 	pvs->status = PARALLEL_INDVAC_STATUS_COMPLETED;
 	pfree(pvs->indname);
@@ -972,6 +981,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	 * workers.
 	 */
 	rel = table_open(shared->relid, ShareUpdateExclusiveLock);
+	pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM, RelationGetRelid(rel));
+	pgstat_progress_update_param(PROGRESS_VACUUM_LEADER_PID, shared->leader_pid);
 
 	/*
 	 * Open all indexes. indrels are sorted in order by OID, which should be
@@ -1042,6 +1053,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 
 	vac_close_indexes(nindexes, indrels, RowExclusiveLock);
 	table_close(rel, ShareUpdateExclusiveLock);
+	pgstat_progress_end_command();
 	FreeAccessStrategy(pvs.bstrategy);
 }
 
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index cd3122c344..879691b37d 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -27,6 +27,9 @@
 #define PROGRESS_VACUUM_NUM_DEAD_TUPLES			6
 #define PROGRESS_VACUUM_TOTAL_INDEXES			7
 #define PROGRESS_VACUUM_INDEXES_COMPLETED		8
+#define PROGRESS_VACUUM_INDEXRELID				9
+#define PROGRESS_VACUUM_TUPLES_REMOVED			10
+#define PROGRESS_VACUUM_LEADER_PID				11
 
 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */
 #define PROGRESS_VACUUM_PHASE_SCAN_HEAP			1
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index d70a176514..d2c66bf034 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2006,7 +2006,22 @@ pg_stat_progress_vacuum| SELECT s.pid,
     s.param8 AS indexes_total,
     s.param9 AS indexes_processed
    FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
-     LEFT JOIN pg_database d ON ((s.datid = d.oid)));
+     LEFT JOIN pg_database d ON ((s.datid = d.oid)))
+  WHERE (s.pid = s.param12);
+pg_stat_progress_vacuum_index| SELECT s.pid,
+    s.datid,
+    d.datname,
+    s.param10 AS indexrelid,
+    s.param12 AS leader_pid,
+        CASE s.param1
+            WHEN 2 THEN 'vacuuming indexes'::text
+            WHEN 4 THEN 'cleaning up indexes'::text
+            ELSE NULL::text
+        END AS phase,
+    s.param11 AS tuples_removed
+   FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
+     LEFT JOIN pg_database d ON ((s.datid = d.oid)))
+  WHERE ((s.param1 = ANY (ARRAY[(2)::bigint, (4)::bigint])) AND (s.param10 > 0));
 pg_stat_replication| SELECT s.pid,
     s.usesysid,
     u.rolname AS usename,
-- 
2.32.0

