Hello

Here's a patch that implements progress reporting for ANALYZE.


Thanks

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 572fea555bc1c9d3512f7806aed6b97a374199eb Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Fri, 21 Jun 2019 13:35:13 -0400
Subject: [PATCH v1] Report progress for ANALYZE

---
 doc/src/sgml/monitoring.sgml         | 129 +++++++++++++++++++++++++++
 src/backend/catalog/system_views.sql |  15 ++++
 src/backend/commands/analyze.c       |  55 +++++++++++-
 src/backend/utils/adt/pgstatfuncs.c  |   2 +
 src/backend/utils/misc/sampling.c    |   6 +-
 src/include/commands/progress.h      |  14 +++
 src/include/pgstat.h                 |   1 +
 src/include/utils/sampling.h         |   2 +-
 src/test/regress/expected/rules.out  |  17 ++++
 9 files changed, 237 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index bf72d0c303..a924ab3b06 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -360,6 +360,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_progress_analyze</structname><indexterm><primary>pg_stat_progress_analyze</primary></indexterm></entry>
+      <entry>One row for each backend (including autovacuum worker processes) running
+       <command>ANALYZE</command>, showing current progress.
+       See <xref linkend='analyze-progress-reporting'/>.
+      </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_progress_cluster</structname><indexterm><primary>pg_stat_progress_cluster</primary></indexterm></entry>
       <entry>One row for each backend running
@@ -3927,6 +3935,127 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
 
  </sect2>
 
+ <sect2 id="analyze-progress-reporting">
+  <title>ANALYZE Progress Reporting</title>
+
+  <para>
+   Whenever <command>ANALYZE</command> is running, the
+   <structname>pg_stat_progress_analyze</structname> view will contain a
+   row for ech backend that is currently running that command.  The tables
+   below describe the information that will be reported and provide
+   information about how to interpret it.
+  </para>
+
+  <table id="pg-stat-progress-analyze-view" xreflabel="pg_stat_progress_analyze">
+   <title><structname>pg_stat_progress_analyze</structname> View</title>
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+   <tbody>
+    <row>
+     <entry><structfield>pid</structfield></entry>
+     <entry><type>integer</type></entry>
+     <entry>Process ID of backend.</entry>
+    </row>
+     <row>
+      <entry><structfield>datid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry>OID of the database to which this backend is connected.</entry>
+     </row>
+     <row>
+      <entry><structfield>datname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry>Name of the database to which this backend is connected.</entry>
+     </row>
+     <row>
+      <entry><structfield>relid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry>OID of the table being analyzed.</entry>
+     </row>
+     <row>
+      <entry><structfield>include_children</structfield></entry>
+      <entry><type>boolean</type></entry>
+      <entry>Whether the current scan includes legacy inheritance children.</entry>
+     </row>
+     <row>
+      <entry><structfield>scanning_table</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry>
+       The table being scanned (differs from <literal>relid</literal>
+       only when processing partitions or inheritance children).
+      </entry>
+     </row>
+     <row>
+      <entry><structfield>phase</structfield></entry>
+      <entry><type>text</type></entry>
+      <entry>Current processing phase. See <xref linkend="analyze-phases" /></entry>
+     </row>
+     <row>
+     <entry><structfield>heap_blks_total</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Total number of heap blocks to scan in the current table.
+     </entry>
+     </row>
+    <row>
+     <entry><structfield>heap_blks_scanned</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Number of heap blocks scanned.
+     </entry>
+    </row>
+   </tbody>
+   </tgroup>
+  </table>
+
+  <table id="analyze-phases">
+   <title>ANALYZE phases</title>
+   <tgroup cols="2">
+    <thead>
+    <row>
+      <entry>Phase</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+   <tbody>
+    <row>
+     <entry><literal>initializing</literal></entry>
+     <entry>
+       The command is preparing to begin scanning the heap.  This phase is
+       expected to be very brief.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>scanning table</literal></entry>
+     <entry>
+       The command is currently scanning the table.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>analyzing sample</literal></entry>
+     <entry>
+       <command>ANALYZE</command> is currently extracting statistical data
+       from the sample obtained.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>analyzing sample (extended)</literal></entry>
+     <entry>
+       <command>ANALYZE</command> is currently extracting statistical data
+       for extended statistics from the sample obtained.
+     </entry>
+    </row>
+   </tbody>
+   </tgroup>
+  </table>
+ </sect2>
+
  <sect2 id="cluster-progress-reporting">
   <title>CLUSTER Progress Reporting</title>
 
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ea4c85e395..e497f59790 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -964,6 +964,21 @@ CREATE VIEW pg_stat_progress_vacuum AS
     FROM pg_stat_get_progress_info('VACUUM') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
+CREATE VIEW pg_stat_progress_analyze AS
+    SELECT
+        S.pid AS pid, S.datid AS datid, D.datname AS datname,
+        CAST(S.relid AS oid) AS relid,
+        CAST(CAST(S.param2 AS int) AS boolean) AS include_children,
+        CAST(S.param3 AS oid) AS scanning_table,
+        CASE S.param1 WHEN 0 THEN 'initializing'
+                      WHEN 1 THEN 'scanning table'
+                      WHEN 2 THEN 'analyzing sample'
+                      WHEN 3 THEN 'analyzing sample (extended stats)'
+                      END AS phase,
+        S.param4 AS heap_blks_total, S.param5 AS heap_blks_scanned
+    FROM pg_stat_get_progress_info('ANALYZE') AS S
+        LEFT JOIN pg_database D ON S.datid = D.oid;
+
 CREATE VIEW pg_stat_progress_cluster AS
     SELECT
         S.pid AS pid,
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 6cb545c126..0a75549440 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -35,6 +35,7 @@
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_statistic_ext.h"
 #include "commands/dbcommands.h"
+#include "commands/progress.h"
 #include "commands/tablecmds.h"
 #include "commands/vacuum.h"
 #include "executor/executor.h"
@@ -251,6 +252,8 @@ analyze_rel(Oid relid, RangeVar *relation,
 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 	MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
 	LWLockRelease(ProcArrayLock);
+	pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
+								  RelationGetRelid(onerel));
 
 	/*
 	 * Do the normal non-recursive ANALYZE.  We can skip this for partitioned
@@ -275,6 +278,8 @@ analyze_rel(Oid relid, RangeVar *relation,
 	 */
 	relation_close(onerel, NoLock);
 
+	pgstat_progress_end_command();
+
 	/*
 	 * Reset my PGXACT flag.  Note: we need this here, and not in vacuum_rel,
 	 * because the vacuum flag is cleared by the end-of-xact code.
@@ -493,6 +498,18 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	/*
 	 * Acquire the sample rows
 	 */
+	{
+		const int   index[] = {
+			PROGRESS_ANALYZE_PHASE,
+			PROGRESS_ANALYZE_INH
+		};
+		const int64 val[] = {
+			PROGRESS_ANALYZE_PHASE_SCAN_TABLE,
+			inh
+		};
+
+		pgstat_progress_update_multi_param(2, index, val);
+	}
 	rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
 	if (inh)
 		numrows = acquire_inherited_sample_rows(onerel, elevel,
@@ -512,7 +529,18 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	if (numrows > 0)
 	{
 		MemoryContext col_context,
-					old_context;
+					  old_context;
+		const int   index[] = {
+			PROGRESS_ANALYZE_PHASE,
+			PROGRESS_ANALYZE_TOTAL_BLOCKS,
+			PROGRESS_ANALYZE_BLOCKS_DONE
+		};
+		const int64 val[] = {
+			PROGRESS_ANALYZE_PHASE_ANALYSIS,
+			0, 0
+		};
+
+		pgstat_progress_update_multi_param(3, index, val);
 
 		col_context = AllocSetContextCreate(anl_context,
 											"Analyze Column",
@@ -574,10 +602,15 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		}
 
 		/* Build extended statistics (if there are any). */
+		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+									 PROGRESS_ANALYZE_PHASE_ANALYSIS_EXTENDED);
 		BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt,
 								   vacattrstats);
 	}
 
+	pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+								 PROGRESS_ANALYZE_PHASE_COMPLETE);
+
 	/*
 	 * Update pages/tuples stats in pg_class ... but not if we're doing
 	 * inherited stats.
@@ -1016,6 +1049,8 @@ acquire_sample_rows(Relation onerel, int elevel,
 	ReservoirStateData rstate;
 	TupleTableSlot *slot;
 	TableScanDesc scan;
+	BlockNumber	nblocks;
+	double		blksdone = 0;
 
 	Assert(targrows > 0);
 
@@ -1025,7 +1060,20 @@ acquire_sample_rows(Relation onerel, int elevel,
 	OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM);
 
 	/* Prepare for sampling block numbers */
-	BlockSampler_Init(&bs, totalblocks, targrows, random());
+	nblocks = BlockSampler_Init(&bs, totalblocks, targrows, random());
+	{
+		const int   index[] = {
+			PROGRESS_ANALYZE_TOTAL_BLOCKS,
+			PROGRESS_ANALYZE_SCANREL
+		};
+		const int64 val[] = {
+			nblocks,
+			RelationGetRelid(onerel)
+		};
+
+		pgstat_progress_update_multi_param(2, index, val);
+	}
+
 	/* Prepare for sampling rows */
 	reservoir_init_selection_state(&rstate, targrows);
 
@@ -1086,6 +1134,9 @@ acquire_sample_rows(Relation onerel, int elevel,
 
 			samplerows += 1;
 		}
+
+		pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
+									 ++blksdone);
 	}
 
 	ExecDropSingleTupleTableSlot(slot);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 05240bfd14..98b01e54fa 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -469,6 +469,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
 	/* Translate command name into command type code. */
 	if (pg_strcasecmp(cmd, "VACUUM") == 0)
 		cmdtype = PROGRESS_COMMAND_VACUUM;
+	if (pg_strcasecmp(cmd, "ANALYZE") == 0)
+		cmdtype = PROGRESS_COMMAND_ANALYZE;
 	else if (pg_strcasecmp(cmd, "CLUSTER") == 0)
 		cmdtype = PROGRESS_COMMAND_CLUSTER;
 	else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0)
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index d2a1537979..f7daece5ee 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -32,8 +32,10 @@
  * Since we know the total number of blocks in advance, we can use the
  * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
  * algorithm.
+ *
+ * Returns the number of blocks that BlockSampler_Next will return.
  */
-void
+BlockNumber
 BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
 				  long randseed)
 {
@@ -48,6 +50,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
 	bs->m = 0;					/* blocks selected so far */
 
 	sampler_random_init_state(randseed, bs->randstate);
+
+	return Min(bs->n, bs->N);
 }
 
 bool
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index acd1313cb3..3007351922 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -34,6 +34,20 @@
 #define PROGRESS_VACUUM_PHASE_TRUNCATE			5
 #define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP		6
 
+/* Progress parameters for analyze */
+#define PROGRESS_ANALYZE_PHASE					0
+#define PROGRESS_ANALYZE_INH					1
+#define PROGRESS_ANALYZE_SCANREL				2
+#define PROGRESS_ANALYZE_TOTAL_BLOCKS			3
+#define PROGRESS_ANALYZE_BLOCKS_DONE			4
+
+/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */
+#define PROGRESS_ANALYZE_PHASE_SCAN_TABLE		1
+#define PROGRESS_ANALYZE_PHASE_ANALYSIS			2
+#define PROGRESS_ANALYZE_PHASE_ANALYSIS_EXTENDED 3
+#define PROGRESS_ANALYZE_PHASE_COMPLETE			4
+
+
 /* Progress parameters for cluster */
 #define PROGRESS_CLUSTER_COMMAND				0
 #define PROGRESS_CLUSTER_PHASE					1
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 0a3ad3a188..9344654b69 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -956,6 +956,7 @@ typedef enum ProgressCommandType
 {
 	PROGRESS_COMMAND_INVALID,
 	PROGRESS_COMMAND_VACUUM,
+	PROGRESS_COMMAND_ANALYZE,
 	PROGRESS_COMMAND_CLUSTER,
 	PROGRESS_COMMAND_CREATE_INDEX
 } ProgressCommandType;
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
index 541b507fb5..76d31dc126 100644
--- a/src/include/utils/sampling.h
+++ b/src/include/utils/sampling.h
@@ -37,7 +37,7 @@ typedef struct
 
 typedef BlockSamplerData *BlockSampler;
 
-extern void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
+extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
 							  int samplesize, long randseed);
 extern bool BlockSampler_HasMore(BlockSampler bs);
 extern BlockNumber BlockSampler_Next(BlockSampler bs);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 210e9cd146..dd3191563d 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1846,6 +1846,23 @@ pg_stat_gssapi| SELECT s.pid,
     s.gss_princ AS principal,
     s.gss_enc AS encrypted
    FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, sslcompression, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc);
+pg_stat_progress_analyze| SELECT s.pid,
+    s.datid,
+    d.datname,
+    s.relid,
+    ((s.param2)::integer)::boolean AS include_children,
+    (s.param3)::oid AS scanning_table,
+        CASE s.param1
+            WHEN 0 THEN 'initializing'::text
+            WHEN 1 THEN 'scanning table'::text
+            WHEN 2 THEN 'analyzing sample'::text
+            WHEN 3 THEN 'analyzing sample (extended stats)'::text
+            ELSE NULL::text
+        END AS phase,
+    s.param4 AS heap_blks_total,
+    s.param5 AS heap_blks_scanned
+   FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
+     LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_cluster| SELECT s.pid,
     s.datid,
     d.datname,
-- 
2.17.1

Reply via email to