On 2019/03/05 17:56, Tatsuro Yamada wrote:
Hi Robert!
On 2019/03/05 11:35, Robert Haas wrote:
On Mon, Mar 4, 2019 at 5:38 AM Tatsuro Yamada
<yamada.tats...@lab.ntt.co.jp> wrote:
=== Current design ===
CLUSTER command uses Index Scan or Seq Scan when scanning the heap.
Depending on which one is chosen, the command will proceed in the
following sequence of phases:
* Scan method: Seq Scan
0. initializing (*2)
1. seq scanning heap (*1)
3. sorting tuples (*2)
4. writing new heap (*1)
5. swapping relation files (*2)
6. rebuilding index (*2)
7. performing final cleanup (*2)
* Scan method: Index Scan
0. initializing (*2)
2. index scanning heap (*1)
5. swapping relation files (*2)
6. rebuilding index (*2)
7. performing final cleanup (*2)
VACUUM FULL command will proceed in the following sequence of phases:
1. seq scanning heap (*1)
5. swapping relation files (*2)
6. rebuilding index (*2)
7. performing final cleanup (*2)
(*1): increasing the value in heap_tuples_scanned column
(*2): only shows the phase in the phase column
All of that sounds good.
The view provides the information of CLUSTER command progress details as follows
# \d pg_stat_progress_cluster
View "pg_catalog.pg_stat_progress_cluster"
Column | Type | Collation | Nullable | Default
---------------------------+---------+-----------+----------+---------
pid | integer | | |
datid | oid | | |
datname | name | | |
relid | oid | | |
command | text | | |
phase | text | | |
cluster_index_relid | bigint | | |
heap_tuples_scanned | bigint | | |
heap_tuples_vacuumed | bigint | | |
Still not sure if we need heap_tuples_vacuumed. We could try to
report heap_blks_scanned and heap_blks_total like we do for VACUUM, if
we're using a Seq Scan.
I have no strong opinion to add heap_tuples_vacuumed, so I'll remove that in
next patch.
Regarding heap_blks_scanned and heap_blks_total, I suppose that it is able to
get those from initscan(). I'll investigate it more.
cluster.c
copy_heap_data()
heap_beginscan()
heap_beginscan_internal()
initscan()
=== Discussion points ===
- Progress counter for "3. sorting tuples" phase
- Should we add pgstat_progress_update_param() in tuplesort.c like a
"trace_sort"?
Thanks to Peter Geoghegan for the useful advice!
How would we avoid an abstraction violation?
Hmm... What do you mean an abstraction violation?
If it is difficult to solve, I'd not like to add the progress counter for the
sorting tuples.
- Progress counter for "6. rebuilding index" phase
- Should we add "index_vacuum_count" in the view like a vacuum progress
monitor?
If yes, I'll add pgstat_progress_update_param() to reindex_relation()
of index.c.
However, I'm not sure whether it is okay or not.
Doesn't seem unreasonable to me.
I see, I'll add it later.
Attached file is revised and WIP patch including:
- Remove heap_tuples_vacuumed
- Add heap_blks_scanned and heap_blks_total
- Add index_vacuum_count
I tried to "add heap_blks_scanned and heap_blks_total" columns and I realized
that
"heap_tuples_scanned" column is suitable as a counter when a scan method is
both index-scan and seq-scan because CLUSTER is on a tuple basis.
Regards,
Tatsuro Yamada
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index d16c3d0ea5..a88e8d2492 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -51,6 +51,7 @@
#include "catalog/storage.h"
#include "commands/tablecmds.h"
#include "commands/event_trigger.h"
+#include "commands/progress.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "miscadmin.h"
@@ -58,6 +59,7 @@
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#include "parser/parser.h"
+#include "pgstat.h"
#include "rewrite/rewriteManip.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
@@ -3850,6 +3852,7 @@ reindex_relation(Oid relid, int flags, int options)
List *indexIds;
bool is_pg_class;
bool result;
+ int i;
/*
* Open and lock the relation. ShareLock is sufficient since we only need
@@ -3937,6 +3940,7 @@ reindex_relation(Oid relid, int flags, int options)
/* Reindex all the indexes. */
doneIndexes = NIL;
+ i = 1;
foreach(indexId, indexIds)
{
Oid indexOid = lfirst_oid(indexId);
@@ -3954,6 +3958,11 @@ reindex_relation(Oid relid, int flags, int options)
if (is_pg_class)
doneIndexes = lappend_oid(doneIndexes, indexOid);
+
+ /* Set index rebuild count */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
+ i);
+ i++;
}
}
PG_CATCH();
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 3e229c693c..88f3940fa5 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -906,6 +906,32 @@ CREATE VIEW pg_stat_progress_vacuum AS
FROM pg_stat_get_progress_info('VACUUM') AS S
LEFT JOIN pg_database D ON S.datid = D.oid;
+CREATE VIEW pg_stat_progress_cluster AS
+ SELECT
+ S.pid AS pid,
+ S.datid AS datid,
+ D.datname AS datname,
+ S.relid AS relid,
+ CASE S.param1 WHEN 1 THEN 'CLUSTER'
+ WHEN 2 THEN 'VACUUM FULL'
+ END AS command,
+ CASE S.param2 WHEN 0 THEN 'initializing'
+ WHEN 1 THEN 'seq scanning heap'
+ WHEN 2 THEN 'index scanning heap'
+ WHEN 3 THEN 'sorting tuples'
+ WHEN 4 THEN 'writing new heap'
+ WHEN 5 THEN 'swapping relation files'
+ WHEN 6 THEN 'rebuilding index'
+ WHEN 7 THEN 'performing final cleanup'
+ END AS phase,
+ S.param3 AS cluster_index_relid,
+ S.param4 AS heap_tuples_scanned,
+ S.param5 AS heap_blks_total,
+ S.param6 AS heap_blks_scanned,
+ S.param7 AS index_rebuild_count
+ FROM pg_stat_get_progress_info('CLUSTER') AS S
+ LEFT JOIN pg_database D ON S.datid = D.oid;
+
CREATE VIEW pg_user_mappings AS
SELECT
U.oid AS umid,
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index a74af4c171..73b5e73b04 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -35,10 +35,12 @@
#include "catalog/objectaccess.h"
#include "catalog/toasting.h"
#include "commands/cluster.h"
+#include "commands/progress.h"
#include "commands/tablecmds.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "optimizer/optimizer.h"
+#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
@@ -275,6 +277,8 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
/* Check for user-requested abort. */
CHECK_FOR_INTERRUPTS();
+ pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
+
/*
* We grab exclusive access to the target rel and index for the duration
* of the transaction. (This is redundant for the single-transaction
@@ -385,6 +389,18 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
*/
CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
+ /* Set command to column */
+ if (OidIsValid(indexOid))
+ {
+ pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
+ PROGRESS_CLUSTER_COMMAND_CLUSTER);
+ }
+ else
+ {
+ pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
+ PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
+ }
+
/* Check heap and index are valid to cluster on */
if (OidIsValid(indexOid))
check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
@@ -415,6 +431,8 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
rebuild_relation(OldHeap, indexOid, verbose);
/* NB: rebuild_relation does table_close() on OldHeap */
+
+ pgstat_progress_end_command();
}
/*
@@ -923,14 +941,33 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
*/
if (OldIndex != NULL && !use_sort)
{
+ const int ci_index[] = {
+ PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_INDEX_RELID
+ };
+ int64 ci_val[2];
+
+ /* Set phase and OIDOldIndex to columns */
+ ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
+ ci_val[1] = OIDOldIndex;
+ pgstat_progress_update_multi_param(2, ci_index, ci_val);
+
heapScan = NULL;
indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
index_rescan(indexScan, NULL, 0, NULL, 0);
}
else
{
+ /* Set phase */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
+
heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
indexScan = NULL;
+
+ /* Set total heap blocks */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
+ heapScan->rs_nblocks);
}
/* Log what we're doing */
@@ -984,6 +1021,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
break;
buf = heapScan->rs_cbuf;
+
+ /* Set heap blocks scanned */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
+ heapScan->rs_cblock);
}
LockBuffer(buf, BUFFER_LOCK_SHARE);
@@ -1049,6 +1090,13 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
tups_vacuumed += 1;
tups_recently_dead -= 1;
}
+
+ /* set tups_vacuumed column for VACUUM FULL */
+ /*
+ pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_VACUUMED,
+ tups_vacuumed);
+ */
+
continue;
}
@@ -1060,6 +1108,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
oldTupDesc, newTupDesc,
values, isnull,
rwstate);
+
+ /* Regardless of index scan or seq scan, update tuples_scanned column */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
+ num_tuples);
}
if (indexScan != NULL)
@@ -1073,8 +1125,29 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
*/
if (tuplesort != NULL)
{
+ double num_tuples = 0;
+ const int cp_index[] = {
+ PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
+ PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
+ PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
+ };
+ int64 cp_val[4];
+
+ /* Report that we are now sorting tuples */
+ cp_val[0] = PROGRESS_CLUSTER_PHASE_SORT_TUPLES;
+ cp_val[1] = num_tuples;
+ cp_val[2] = 0;
+ cp_val[3] = 0;
+ pgstat_progress_update_multi_param(4, cp_index, cp_val);
+
tuplesort_performsort(tuplesort);
+ /* Report that we are now writing new heap */
+ cp_val[0] = PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP;
+ cp_val[1] = num_tuples;
+ pgstat_progress_update_multi_param(2, cp_index, cp_val);
+
for (;;)
{
HeapTuple tuple;
@@ -1085,10 +1158,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
if (tuple == NULL)
break;
+ num_tuples += 1;
reform_and_rewrite_tuple(tuple,
oldTupDesc, newTupDesc,
values, isnull,
rwstate);
+ /* Report num_tuples */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
+ num_tuples);
}
tuplesort_end(tuplesort);
@@ -1526,6 +1603,16 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
Oid mapped_tables[4];
int reindex_flags;
int i;
+ const int cp_index[] = {
+ PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
+ };
+ int64 cp_val[2];
+
+ /* Report that we are now swapping relation files */
+ cp_val[0] = PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES;
+ cp_val[1] = 0;
+ pgstat_progress_update_multi_param(2, cp_index, cp_val);
/* Zero out possible results from swapped_relation_files */
memset(mapped_tables, 0, sizeof(mapped_tables));
@@ -1561,6 +1648,11 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
* because the new heap won't contain any HOT chains at all, let alone
* broken ones, so it can't be necessary to set indcheckxmin.
*/
+
+ /* Report that we are now reindexing relations */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
+
reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
if (check_constraints)
reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
@@ -1576,6 +1668,10 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
reindex_relation(OIDOldHeap, reindex_flags, 0);
+ /* Report that we are now doing clean up */
+ pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
+ PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
+
/*
* If the relation being rebuild is pg_class, swap_relation_files()
* couldn't update pg_class's own pg_class entry (check comments in
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 69f7265779..37ff3dbff6 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -468,6 +468,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
/* Translate command name into command type code. */
if (pg_strcasecmp(cmd, "VACUUM") == 0)
cmdtype = PROGRESS_COMMAND_VACUUM;
+ else if(pg_strcasecmp(cmd, "CLUSTER") == 0)
+ cmdtype = PROGRESS_COMMAND_CLUSTER;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index 9858b36a38..0f637fe4e7 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -34,4 +34,26 @@
#define PROGRESS_VACUUM_PHASE_TRUNCATE 5
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP 6
+/* Progress parameters for cluster */
+#define PROGRESS_CLUSTER_COMMAND 0
+#define PROGRESS_CLUSTER_PHASE 1
+#define PROGRESS_CLUSTER_INDEX_RELID 2
+#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED 3
+#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS 4
+#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED 5
+#define PROGRESS_CLUSTER_INDEX_REBUILD_COUNT 6
+
+/* Phases of cluster (as dvertised via PROGRESS_CLUSTER_PHASE) */
+#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP 1
+#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP 2
+#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES 3
+#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP 4
+#define PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES 5
+#define PROGRESS_CLUSTER_PHASE_REBUILD_INDEX 6
+#define PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP 7
+
+/* Commands of PROGRESS_CLUSTER */
+#define PROGRESS_CLUSTER_COMMAND_CLUSTER 1
+#define PROGRESS_CLUSTER_COMMAND_VACUUM_FULL 2
+
#endif
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 88a75fb798..745685c8a6 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -934,7 +934,8 @@ typedef enum
typedef enum ProgressCommandType
{
PROGRESS_COMMAND_INVALID,
- PROGRESS_COMMAND_VACUUM
+ PROGRESS_COMMAND_VACUUM,
+ PROGRESS_COMMAND_CLUSTER
} ProgressCommandType;
#define PGSTAT_NUM_PROGRESS_PARAM 10
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 98f417cb57..f72fdd4d92 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1829,6 +1829,33 @@ pg_stat_database_conflicts| SELECT d.oid AS datid,
pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin,
pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock
FROM pg_database d;
+pg_stat_progress_cluster| SELECT s.pid,
+ s.datid,
+ d.datname,
+ s.relid,
+ CASE s.param1
+ WHEN 1 THEN 'CLUSTER'::text
+ WHEN 2 THEN 'VACUUM FULL'::text
+ ELSE NULL::text
+ END AS command,
+ CASE s.param2
+ WHEN 0 THEN 'initializing'::text
+ WHEN 1 THEN 'seq scanning heap'::text
+ WHEN 2 THEN 'index scanning heap'::text
+ WHEN 3 THEN 'sorting tuples'::text
+ WHEN 4 THEN 'writing new heap'::text
+ WHEN 5 THEN 'swapping relation files'::text
+ WHEN 6 THEN 'rebuilding index'::text
+ WHEN 7 THEN 'performing final cleanup'::text
+ ELSE NULL::text
+ END AS phase,
+ s.param4 AS cluster_index_relid,
+ s.param5 AS heap_tuples_scanned,
+ s.param6 AS heap_blks_total,
+ s.param7 AS heap_blks_scanned,
+ s.param8 AS index_rebuild_count
+ FROM (pg_stat_get_progress_info('CLUSTER'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10)
+ LEFT JOIN pg_database d ON ((s.datid = d.oid)));
pg_stat_progress_vacuum| SELECT s.pid,
s.datid,
d.datname,