On Fri, Dec 03, 2021 at 10:16:24AM +0900, Michael Paquier wrote: > On Thu, Sep 23, 2021 at 06:56:26PM -0500, Justin Pryzby wrote: > > On Thu, Sep 23, 2021 at 08:18:41PM +0200, Matthias van de Meent wrote: > >> Note: The following review is based on the assumption that this v11 > >> revision was meant to contain only one patch. I put this up as a note, > >> because it seemed quite limited when compared to earlier versions of > >> the patchset. > > > > Alvaro's critique was that the patchset was too complicated for what was > > claimed to be a marginal feature. My response was to rearrange the > > patchset to > > its minimal form, supporting CLUSTER without marking the index as clustered. > > > > My goal is to present a minimal patch and avoid any nonessential complexity. > > FWIW, my opinion on the matter is similar to Alvaro's, and an extra > read of the patch gives me the same impression. Let's see if others > have an opinion on the matter.
You and Alvaro thought the patch was too complicated for its value, so I reduced the scope to its essential form. CLUSTER was claimed to be of marginal utility, since the table is locked. But locking one partition at a time would be less disruptive than locking an equivalent non-partitioned table. There's only about a dozen, other remaining restrictions/limitations on partitioned tables, (AMs, triggers, identity, generated, exclusion, CIC, FREEZE). Since it's supported to VACUUM (including VACUUM FULL) and REINDEX a partitioned table, I'm still suprised there's much hesitation to support CLUSTER (which is used by vacuum full). > > Thanks for looking. I'm going to see about updating comments based on > > corresponding parts of vacuum and on this message itself. > > It doesn't feel right to just discard the patch at this stage, and it > needs an update, so I have moved it to the next CF for now, waiting on > author. If this does not really move on, my suggestion is to discard > the patch at the end of next CF, aka 2022-01. This includes minor updates based on Mathias review (commit message and test case). -- Justin
>From cbeb98a016a05738007c98ef0a3828bdedf83d24 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sun, 7 Jun 2020 16:58:42 -0500 Subject: [PATCH v11] Implement CLUSTER of partitioned table.. VACUUM (including vacuum full) has recursed into partitioned tables since partitioning were introduced in v10 (3c3bb9933). See expand_vacuum_rel(). For VACUUM FULL, vacuum_rel() calls cluster_rel() for each partition. This patch is to make cluster() do all the same stuff before calling cluster_rel(). For now, partitioned indexes cannot be marked clustered, so clustering requires specification of a partitioned index on the partitioned table. See also a556549d7 and 19de0ab23. --- doc/src/sgml/ref/cluster.sgml | 6 + src/backend/commands/cluster.c | 177 ++++++++++++++++++++------ src/bin/psql/tab-complete.c | 1 + src/include/commands/cluster.h | 1 + src/test/regress/expected/cluster.out | 45 ++++++- src/test/regress/sql/cluster.sql | 21 ++- 6 files changed, 204 insertions(+), 47 deletions(-) diff --git a/doc/src/sgml/ref/cluster.sgml b/doc/src/sgml/ref/cluster.sgml index 86f5fdc469b..b3463ae5c46 100644 --- a/doc/src/sgml/ref/cluster.sgml +++ b/doc/src/sgml/ref/cluster.sgml @@ -196,6 +196,12 @@ CLUSTER [VERBOSE] in the <structname>pg_stat_progress_cluster</structname> view. See <xref linkend="cluster-progress-reporting"/> for details. </para> + + <para> + Clustering a partitioned table clusters each of its partitions using the + partition of the specified partitioned index (which must be specified). + </para> + </refsect1> <refsect1> diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 61853e6dec4..196c7f7eeb2 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -32,7 +32,9 @@ #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_inherits.h" #include "catalog/toasting.h" #include "commands/cluster.h" #include "commands/defrem.h" @@ -73,6 +75,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); +static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, + Oid indexOid); +static void cluster_multiple_rels(List *rvs, int options); /*--------------------------------------------------------------------------- @@ -131,6 +136,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) Relation rel; /* Find, lock, and check permissions on the table */ + /* Obtain AEL now to avoid lock-upgrade hazard in the single-transaction case */ tableOid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, 0, @@ -146,14 +152,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); - /* - * Reject clustering a partitioned table. - */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot cluster a partitioned table"))); - if (stmt->indexname == NULL) { ListCell *index; @@ -188,11 +186,50 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) stmt->indexname, stmt->relation->relname))); } - /* close relation, keep lock till commit */ - table_close(rel, NoLock); + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + /* close relation, keep lock till commit */ + table_close(rel, NoLock); - /* Do the job. */ - cluster_rel(tableOid, indexOid, ¶ms); + /* Do the job. */ + cluster_rel(tableOid, indexOid, ¶ms); + } + else + { + List *rvs; + MemoryContext cluster_context; + + /* + * Expand partitioned relations for CLUSTER (the corresponding + * thing for VACUUM FULL happens in and around expand_vacuum_rel() + */ + + /* Refuse to hold strong locks in a user transaction */ + PreventInTransactionBlock(isTopLevel, "CLUSTER"); + + cluster_context = AllocSetContextCreate(PortalContext, + "Cluster", + ALLOCSET_DEFAULT_SIZES); + + rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid); + + /* + * For now, partitioned indexes are not actually marked clustered. + */ + check_index_is_clusterable(rel, indexOid, true, AccessShareLock); + + /* close relation, releasing lock on parent table */ + table_close(rel, AccessExclusiveLock); + + /* Do the job. */ + cluster_multiple_rels(rvs, params.options); + + /* Start a new transaction for the cleanup work. */ + StartTransactionCommand(); + + /* Clean up working storage */ + MemoryContextDelete(cluster_context); + } } else { @@ -202,7 +239,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) */ MemoryContext cluster_context; List *rvs; - ListCell *rv; /* * We cannot run this form of CLUSTER inside a user transaction block; @@ -225,28 +261,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) * cluster_context. */ rvs = get_tables_to_cluster(cluster_context); - - /* Commit to get out of starting transaction */ - PopActiveSnapshot(); - CommitTransactionCommand(); - - /* Ok, now that we've got them all, cluster them one by one */ - foreach(rv, rvs) - { - RelToCluster *rvtc = (RelToCluster *) lfirst(rv); - ClusterParams cluster_params = params; - - /* Start a new transaction for each relation. */ - StartTransactionCommand(); - /* functions in indexes may want a snapshot set */ - PushActiveSnapshot(GetTransactionSnapshot()); - /* Do the job. */ - cluster_params.options |= CLUOPT_RECHECK; - cluster_rel(rvtc->tableOid, rvtc->indexOid, - &cluster_params); - PopActiveSnapshot(); - CommitTransactionCommand(); - } + cluster_multiple_rels(rvs, params.options | CLUOPT_RECHECK_ISCLUSTERED); /* Start a new transaction for the cleanup work. */ StartTransactionCommand(); @@ -327,10 +342,11 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) /* * Silently skip a temp table for a remote session. Only doing this * check in the "recheck" case is appropriate (which currently means - * somebody is executing a database-wide CLUSTER), because there is - * another check in cluster() which will stop any attempt to cluster - * remote temp tables by name. There is another check in cluster_rel - * which is redundant, but we leave it for extra safety. + * somebody is executing a database-wide CLUSTER or on a partitioned + * table), because there is another check in cluster() which will stop + * any attempt to cluster remote temp tables by name. There is another + * check in cluster_rel which is redundant, but we leave it for extra + * safety. */ if (RELATION_IS_OTHER_TEMP(OldHeap)) { @@ -352,9 +368,10 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) } /* - * Check that the index is still the one with indisclustered set. + * Check that the index is still the one with indisclustered set, if needed. */ - if (!get_index_isclustered(indexOid)) + if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 && + !get_index_isclustered(indexOid)) { relation_close(OldHeap, AccessExclusiveLock); pgstat_progress_end_command(); @@ -415,6 +432,9 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params) return; } + // Should silently skip this rather than assert ? + Assert(RELKIND_HAS_STORAGE(OldHeap->rd_rel->relkind)); + /* * All predicate locks on the tuples or pages are about to be made * invalid, because we move tuples around. Promote them to relation @@ -585,8 +605,8 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) TransactionId frozenXid; MultiXactId cutoffMulti; - /* Mark the correct index as clustered */ if (OidIsValid(indexOid)) + /* Mark the correct index as clustered */ mark_index_clustered(OldHeap, indexOid, true); /* Remember info about rel before closing OldHeap */ @@ -1604,3 +1624,76 @@ get_tables_to_cluster(MemoryContext cluster_context) return rvs; } + +/* + * Return a List of tables and their associated indexes, where each index is a + * partition of the given index + * We're called with a lock held on the parent table. + */ +static List * +get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid) +{ + List *inhoids; + ListCell *lc; + List *rvs = NIL; + MemoryContext old_context; + + /* Do not lock the children until they're processed. */ + inhoids = find_all_inheritors(indexOid, NoLock, NULL); + + /* + * We have to build the list in a different memory context so it will + * survive the cross-transaction processing + */ + old_context = MemoryContextSwitchTo(cluster_context); + + foreach(lc, inhoids) + { + Oid indexrelid = lfirst_oid(lc); + Oid relid = IndexGetRelation(indexrelid, false); + RelToCluster *rvtc; + + if (!RELKIND_HAS_STORAGE(get_rel_relkind(indexrelid))) + continue; + + rvtc = (RelToCluster *) palloc(sizeof(RelToCluster)); + rvtc->tableOid = relid; + rvtc->indexOid = indexrelid; + rvs = lappend(rvs, rvtc); + } + + MemoryContextSwitchTo(old_context); + return rvs; +} + +/* Cluster each relation in a separate transaction */ +static void +cluster_multiple_rels(List *rvs, int options) +{ + ListCell *lc; + + /* Commit to get out of starting transaction */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* Ok, now that we've got them all, cluster them one by one */ + foreach(lc, rvs) + { + RelToCluster *rvtc = (RelToCluster *) lfirst(lc); + ClusterParams cluster_params = { .options = options, }; + + /* Start a new transaction for each relation. */ + StartTransactionCommand(); + + /* functions in indexes may want a snapshot set */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* Do the job. */ + cluster_params.options |= CLUOPT_RECHECK; + cluster_rel(rvtc->tableOid, rvtc->indexOid, + &cluster_params); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } +} diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 39be6f556a8..9944f21f71c 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -599,6 +599,7 @@ static const SchemaQuery Query_for_list_of_clusterables = { .catname = "pg_catalog.pg_class c", .selcondition = "c.relkind IN (" CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_PARTITIONED_TABLE) ", " CppAsString2(RELKIND_MATVIEW) ")", .viscondition = "pg_catalog.pg_table_is_visible(c.oid)", .namespace = "c.relnamespace", diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h index 3db375d7cc7..0605b053b61 100644 --- a/src/include/commands/cluster.h +++ b/src/include/commands/cluster.h @@ -22,6 +22,7 @@ /* flag bits for ClusterParams->flags */ #define CLUOPT_RECHECK 0x01 /* recheck relation state */ #define CLUOPT_VERBOSE 0x02 /* print progress info */ +#define CLUOPT_RECHECK_ISCLUSTERED 0x04 /* recheck relation state for indisclustered */ /* options for CLUSTER */ typedef struct ClusterParams diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index e46a66952f0..3f2758d13f6 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -444,13 +444,52 @@ DROP TABLE clustertest; CREATE TABLE clustertest (f1 int PRIMARY KEY); CLUSTER clustertest USING clustertest_pkey; CLUSTER clustertest; --- Check that partitioned tables cannot be clustered +-- Check that partitioned tables can be clustered CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a); +CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a); +CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10); +CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a); +CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30); +CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a); +CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT; +CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a); +CLUSTER clstrpart USING clstrpart_only_idx; -- fails +ERROR: cannot cluster on invalid index "clstrpart_only_idx" +DROP INDEX clstrpart_only_idx; CREATE INDEX clstrpart_idx ON clstrpart (a); +-- Check that clustering sets new relfilenodes: +CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +CLUSTER clstrpart USING clstrpart_idx; +CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C"; + relname | level | relkind | ?column? +-------------+-------+---------+---------- + clstrpart | 0 | p | t + clstrpart1 | 1 | p | t + clstrpart11 | 2 | r | f + clstrpart12 | 2 | p | t + clstrpart2 | 1 | r | f + clstrpart3 | 1 | p | t + clstrpart33 | 2 | r | f +(7 rows) + +-- Partitioned indexes aren't and can't be marked un/clustered: +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) +Number of partitions: 3 (Use \d+ to list them.) + +CLUSTER clstrpart; +ERROR: there is no previously clustered index for table "clstrpart" +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +ERROR: cannot mark index clustered in partitioned table ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; ERROR: cannot mark index clustered in partitioned table -CLUSTER clstrpart USING clstrpart_idx; -ERROR: cannot cluster a partitioned table DROP TABLE clstrpart; -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index aee9cf83e04..74118993a82 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -202,11 +202,28 @@ CREATE TABLE clustertest (f1 int PRIMARY KEY); CLUSTER clustertest USING clustertest_pkey; CLUSTER clustertest; --- Check that partitioned tables cannot be clustered +-- Check that partitioned tables can be clustered CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a); +CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a); +CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10); +CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a); +CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30); +CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a); +CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT; +CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a); +CLUSTER clstrpart USING clstrpart_only_idx; -- fails +DROP INDEX clstrpart_only_idx; CREATE INDEX clstrpart_idx ON clstrpart (a); -ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +-- Check that clustering sets new relfilenodes: +CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; CLUSTER clstrpart USING clstrpart_idx; +CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C"; +-- Partitioned indexes aren't and can't be marked un/clustered: +\d clstrpart +CLUSTER clstrpart; +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; DROP TABLE clstrpart; -- Test CLUSTER with external tuplesorting -- 2.17.1