@cfbot: rebased On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote: > I'm attaching a counter-proposal to your catalog change, which preserves > indisclustered on children of clustered, partitioned indexes, and invalidates > indisclustered when attaching unclustered indexes.
..and now propagates CLUSTER ON to child indexes. I left this as separate patches to show what I mean and what's new while we discuss it. -- Justin
>From 521b91b2a5555f5688e88145714db1e990b803ea Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sun, 7 Jun 2020 16:58:42 -0500 Subject: [PATCH v3 1/3] Implement CLUSTER of partitioned table.. This requires either specification of a partitioned index on which to cluster, or that an partitioned index was previously set clustered. --- doc/src/sgml/ref/cluster.sgml | 6 + src/backend/commands/cluster.c | 167 +++++++++++++++++++------- src/bin/psql/tab-complete.c | 1 + src/include/nodes/parsenodes.h | 5 +- src/test/regress/expected/cluster.out | 58 ++++++++- src/test/regress/sql/cluster.sql | 24 +++- 6 files changed, 208 insertions(+), 53 deletions(-) diff --git a/doc/src/sgml/ref/cluster.sgml b/doc/src/sgml/ref/cluster.sgml index b9450e7366..0476cfff72 100644 --- a/doc/src/sgml/ref/cluster.sgml +++ b/doc/src/sgml/ref/cluster.sgml @@ -172,6 +172,12 @@ CLUSTER [VERBOSE] are periodically reclustered. </para> + <para> + Clustering a partitioned table clusters each of its partitions using the + index partition of the given partitioned index or (if not specified) the + partitioned index marked as clustered. + </para> + </refsect1> <refsect1> diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 04d12a7ece..391e018bbd 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -32,7 +32,9 @@ #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_inherits.h" #include "catalog/toasting.h" #include "commands/cluster.h" #include "commands/progress.h" @@ -72,6 +74,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); +static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, + Oid indexOid); +static void cluster_multiple_rels(List *rvs, int options); /*--------------------------------------------------------------------------- @@ -113,7 +118,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel) AccessExclusiveLock, 0, RangeVarCallbackOwnsTable, NULL); - rel = table_open(tableOid, NoLock); + rel = table_open(tableOid, ShareUpdateExclusiveLock); /* * Reject clustering a remote temp table ... their local buffer @@ -124,14 +129,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); - /* - * Reject clustering a partitioned table. - */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot cluster a partitioned table"))); - if (stmt->indexname == NULL) { ListCell *index; @@ -169,8 +166,32 @@ cluster(ClusterStmt *stmt, bool isTopLevel) /* close relation, keep lock till commit */ table_close(rel, NoLock); - /* Do the job. */ - cluster_rel(tableOid, indexOid, stmt->options); + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + { + /* Do the job. */ + cluster_rel(tableOid, indexOid, stmt->options); + } + else + { + List *rvs; + MemoryContext cluster_context; + + /* Refuse to hold strong locks in a user transaction */ + PreventInTransactionBlock(isTopLevel, "CLUSTER"); + + cluster_context = AllocSetContextCreate(PortalContext, + "Cluster", + ALLOCSET_DEFAULT_SIZES); + + rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid); + cluster_multiple_rels(rvs, stmt->options); + + /* Start a new transaction for the cleanup work. */ + StartTransactionCommand(); + + /* Clean up working storage */ + MemoryContextDelete(cluster_context); + } } else { @@ -180,7 +201,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel) */ MemoryContext cluster_context; List *rvs; - ListCell *rv; /* * We cannot run this form of CLUSTER inside a user transaction block; @@ -204,25 +224,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel) */ rvs = get_tables_to_cluster(cluster_context); - /* Commit to get out of starting transaction */ - PopActiveSnapshot(); - CommitTransactionCommand(); - - /* Ok, now that we've got them all, cluster them one by one */ - foreach(rv, rvs) - { - RelToCluster *rvtc = (RelToCluster *) lfirst(rv); - - /* Start a new transaction for each relation. */ - StartTransactionCommand(); - /* functions in indexes may want a snapshot set */ - PushActiveSnapshot(GetTransactionSnapshot()); - /* Do the job. */ - cluster_rel(rvtc->tableOid, rvtc->indexOid, - stmt->options | CLUOPT_RECHECK); - PopActiveSnapshot(); - CommitTransactionCommand(); - } + cluster_multiple_rels(rvs, stmt->options | CLUOPT_RECHECK_ISCLUSTERED); /* Start a new transaction for the cleanup work. */ StartTransactionCommand(); @@ -328,9 +330,10 @@ cluster_rel(Oid tableOid, Oid indexOid, int options) } /* - * Check that the index is still the one with indisclustered set. + * Check that the index is still the one with indisclustered set, if needed. */ - if (!get_index_isclustered(indexOid)) + if ((options & CLUOPT_RECHECK_ISCLUSTERED) != 0 && + !get_index_isclustered(indexOid)) { relation_close(OldHeap, AccessExclusiveLock); pgstat_progress_end_command(); @@ -374,8 +377,13 @@ cluster_rel(Oid tableOid, Oid indexOid, int options) /* Check heap and index are valid to cluster on */ if (OidIsValid(indexOid)) + { check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock); + /* Mark the index as clustered */ + mark_index_clustered(OldHeap, indexOid, true); + } + /* * Quietly ignore the request if this is a materialized view which has not * been populated from its query. No harm is done because there is no data @@ -391,6 +399,14 @@ cluster_rel(Oid tableOid, Oid indexOid, int options) return; } + /* For a partitioned rel, we're done. */ + if (!RELKIND_HAS_STORAGE(get_rel_relkind(tableOid))) + { + relation_close(OldHeap, AccessExclusiveLock); + pgstat_progress_end_command(); + return; + } + /* * All predicate locks on the tuples or pages are about to be made * invalid, because we move tuples around. Promote them to relation @@ -459,6 +475,9 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD * the worst consequence of following broken HOT chains would be that we * might put recently-dead tuples out-of-order in the new table, and there * is little harm in that.) + * + * This also refuses to cluster on an "incomplete" partitioned index + * created with "ON ONLY". */ if (!OldIndex->rd_index->indisvalid) ereport(ERROR, @@ -483,12 +502,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) Relation pg_index; ListCell *index; - /* Disallow applying to a partitioned table */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot mark index clustered in partitioned table"))); - /* * If the index is already marked clustered, no need to do anything. */ @@ -560,10 +573,6 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) TransactionId frozenXid; MultiXactId cutoffMulti; - /* Mark the correct index as clustered */ - if (OidIsValid(indexOid)) - mark_index_clustered(OldHeap, indexOid, true); - /* Remember info about rel before closing OldHeap */ relpersistence = OldHeap->rd_rel->relpersistence; is_system_catalog = IsSystemRelation(OldHeap); @@ -1557,3 +1566,75 @@ get_tables_to_cluster(MemoryContext cluster_context) return rvs; } + +/* + * Return a List of tables and associated index, where each index is a + * partition of the given index + */ +static List * +get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid) +{ + List *inhoids; + ListCell *lc; + List *rvs = NIL; + MemoryContext old_context; + + inhoids = find_all_inheritors(indexOid, NoLock, NULL); + + foreach(lc, inhoids) + { + Oid indexrelid = lfirst_oid(lc); + Oid relid = IndexGetRelation(indexrelid, false); + RelToCluster *rvtc; + + /* + * Partitioned rels are also processed by cluster_rel, to + * call check_index_is_clusterable() and mark_index_clustered(). + */ + + /* + * We have to build the list in a different memory context so it will + * survive the cross-transaction processing + */ + old_context = MemoryContextSwitchTo(cluster_context); + + rvtc = (RelToCluster *) palloc(sizeof(RelToCluster)); + rvtc->tableOid = relid; + rvtc->indexOid = indexrelid; + rvs = lappend(rvs, rvtc); + + MemoryContextSwitchTo(old_context); + } + + return rvs; +} + +/* Cluster each relation in a separate transaction */ +static void +cluster_multiple_rels(List *rvs, int options) +{ + ListCell *lc; + + /* Commit to get out of starting transaction */ + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* Ok, now that we've got them all, cluster them one by one */ + foreach(lc, rvs) + { + RelToCluster *rvtc = (RelToCluster *) lfirst(lc); + + /* Start a new transaction for each relation. */ + StartTransactionCommand(); + + /* functions in indexes may want a snapshot set */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* Do the job. */ + cluster_rel(rvtc->tableOid, rvtc->indexOid, + options | CLUOPT_RECHECK); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } +} diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 5238a960f7..07ef7fc1b7 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -588,6 +588,7 @@ static const SchemaQuery Query_for_list_of_clusterables = { .catname = "pg_catalog.pg_class c", .selcondition = "c.relkind IN (" CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_PARTITIONED_TABLE) ", " CppAsString2(RELKIND_MATVIEW) ")", .viscondition = "pg_catalog.pg_table_is_visible(c.oid)", .namespace = "c.relnamespace", diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 17b4e335c6..c862662a78 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3198,8 +3198,9 @@ typedef struct AlterSystemStmt */ typedef enum ClusterOption { - CLUOPT_RECHECK = 1 << 0, /* recheck relation state */ - CLUOPT_VERBOSE = 1 << 1 /* print progress info */ + CLUOPT_VERBOSE = 1 << 0, /* print progress info */ + CLUOPT_RECHECK = 1 << 1, /* recheck relation state */ + CLUOPT_RECHECK_ISCLUSTERED = 1 << 2, /* recheck relation state for indisclustered */ } ClusterOption; typedef struct ClusterStmt diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index bdae8fe00c..e4448350e7 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -439,14 +439,62 @@ select * from clstr_temp; drop table clstr_temp; RESET SESSION AUTHORIZATION; --- Check that partitioned tables cannot be clustered +-- Check that partitioned tables can be clustered CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a); +CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a); +CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10); +CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a); +CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30); +CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a); +CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT; +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a); +CLUSTER clstrpart USING clstrpart_only_idx; -- fails +ERROR: cannot cluster on invalid index "clstrpart_only_idx" +DROP INDEX clstrpart_only_idx; CREATE INDEX clstrpart_idx ON clstrpart (a); -ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; -ERROR: cannot mark index clustered in partitioned table +-- Check that clustering sets new relfilenodes: +CREATE TEMP TABLE old_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; CLUSTER clstrpart USING clstrpart_idx; -ERROR: cannot cluster a partitioned table -DROP TABLE clstrpart; +CREATE TEMP TABLE new_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +SELECT relname, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY 1; + relname | relkind | ?column? +-------------+---------+---------- + clstrpart | p | t + clstrpart1 | p | t + clstrpart11 | r | f + clstrpart12 | p | t + clstrpart2 | r | f + clstrpart3 | p | t + clstrpart33 | r | f +(7 rows) + +-- Check that clustering sets new indisclustered: +SELECT i.indexrelid::regclass::text, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY 1; + indexrelid | relkind | indisclustered +-------------------+---------+---------------- + clstrpart11_a_idx | i | t + clstrpart12_a_idx | I | t + clstrpart1_a_idx | I | t + clstrpart2_a_idx | i | t + clstrpart33_a_idx | i | t + clstrpart3_a_idx | I | t + clstrpart_idx | I | t +(7 rows) + +CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned +CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs +CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) CLUSTER +Number of partitions: 3 (Use \d+ to list them.) + -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index 188183647c..22225dc924 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -196,12 +196,30 @@ drop table clstr_temp; RESET SESSION AUTHORIZATION; --- Check that partitioned tables cannot be clustered +-- Check that partitioned tables can be clustered CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a); +CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a); +CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10); +CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a); +CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30); +CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a); +CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT; +ALTER TABLE clstrpart SET WITHOUT CLUSTER; +CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a); +CLUSTER clstrpart USING clstrpart_only_idx; -- fails +DROP INDEX clstrpart_only_idx; CREATE INDEX clstrpart_idx ON clstrpart (a); -ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +-- Check that clustering sets new relfilenodes: +CREATE TEMP TABLE old_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; CLUSTER clstrpart USING clstrpart_idx; -DROP TABLE clstrpart; +CREATE TEMP TABLE new_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ; +SELECT relname, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY 1; +-- Check that clustering sets new indisclustered: +SELECT i.indexrelid::regclass::text, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY 1; +CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned +CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs +CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf +\d clstrpart -- Test CLUSTER with external tuplesorting -- 2.17.0
>From cbd643ad3cb19f500a2af9200234402acf2f0511 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Tue, 6 Oct 2020 20:40:18 -0500 Subject: [PATCH v3 2/3] preserve indisclustered on children of clustered, partitioned indexes Note, this takes a parentIndex, but that wasn't previously used ... UpdateIndexRelation(Oid indexoid, Oid heapoid, Oid parentIndexId, --- src/backend/catalog/index.c | 2 +- src/test/regress/expected/cluster.out | 11 +++++++++++ src/test/regress/sql/cluster.sql | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 5ec225abe1..55d429971f 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -603,7 +603,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion); values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate); - values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false); + values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(OidIsValid(parentIndexId) && get_index_isclustered(parentIndexId)); values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid); values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false); values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready); diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index e4448350e7..8f245da46d 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -495,6 +495,17 @@ Indexes: "clstrpart_idx" btree (a) CLUSTER Number of partitions: 3 (Use \d+ to list them.) +CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40); +\d clstrpart4 + Table "public.clstrpart4" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart FOR VALUES FROM (30) TO (40) +Indexes: + "clstrpart4_a_idx" btree (a) CLUSTER + +DROP TABLE clstrpart; -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; create index cluster_sort on clstr_4 (hundred, thousand, tenthous); diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index 22225dc924..b871ab53c3 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -220,6 +220,9 @@ CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitio CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf \d clstrpart +CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40); +\d clstrpart4 +DROP TABLE clstrpart; -- Test CLUSTER with external tuplesorting -- 2.17.0
>From f41adc43b9b27c7b8aec8c78297ad8042f5049e7 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Tue, 6 Oct 2020 22:11:12 -0500 Subject: [PATCH v3 3/3] Propogate changes to indisclustered to child/parents --- src/backend/commands/cluster.c | 116 +++++++++++++++++--------- src/backend/commands/indexcmds.c | 20 +++++ src/test/regress/expected/cluster.out | 43 ++++++++++ src/test/regress/sql/cluster.sql | 13 +++ 4 files changed, 154 insertions(+), 38 deletions(-) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 391e018bbd..1e36c47ec6 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -73,6 +73,7 @@ static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); +static void set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index); static List *get_tables_to_cluster(MemoryContext cluster_context); static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid); @@ -489,65 +490,104 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD index_close(OldIndex, NoLock); } +/* + * Helper for mark_index_clustered + * Mark a single index as clustered or not. + * pg_index is passed by caller to avoid repeatedly re-opening it. + */ +static void +set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index) +{ + HeapTuple indexTuple; + Form_pg_index indexForm; + + indexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(indexOid)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexOid); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + /* this was checked earlier, but let's be real sure */ + if (isclustered && !indexForm->indisvalid) + elog(ERROR, "cannot cluster on invalid index %u", indexOid); + + indexForm->indisclustered = isclustered; + CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); + heap_freetuple(indexTuple); +} + /* * mark_index_clustered: mark the specified index as the one clustered on * - * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. + * With indexOid == InvalidOid, mark all indexes of rel not-clustered. + * Otherwise, mark children of the clustered as clustered, and parents of other + * indexes as unclustered. */ void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal) { - HeapTuple indexTuple; - Form_pg_index indexForm; - Relation pg_index; - ListCell *index; + ListCell *lc, *lc2; + Relation pg_index = table_open(IndexRelationId, RowExclusiveLock); + List *inh = find_all_inheritors(RelationGetRelid(rel), + ShareRowExclusiveLock, NULL); /* - * If the index is already marked clustered, no need to do anything. + * Check each index of the relation and set/clear the bit as needed. + * Iterate over the relation's children rather than the index's chilren + * since we need to unset cluster for indexes on intermediate children, + * too. */ - if (OidIsValid(indexOid)) + + foreach(lc, inh) { - if (get_index_isclustered(indexOid)) - return; + Oid inhrelid = lfirst_oid(lc); + Relation thisrel = table_open(inhrelid, ShareRowExclusiveLock); + List *indexes = RelationGetIndexList(thisrel); + + foreach (lc2, indexes) + { + Oid thisIndexOid = lfirst_oid(lc2); + Oid partoid = index_get_partition(thisrel, indexOid); + + if (thisIndexOid == indexOid || partoid == thisIndexOid) // OidIsValid(partoid)) + { + /* A child of the clustered index should be set clustered, too */ + set_indisclustered(thisIndexOid, true, pg_index); + } + else + { + /* indexes which are not children of the clustered index are set unclustered */ + set_indisclustered(thisIndexOid, false, pg_index); + } + + InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, + InvalidOid, is_internal); + } + + list_free(indexes); + table_close(thisrel, ShareRowExclusiveLock); } + list_free(inh); + /* - * Check each index of the relation and set/clear the bit as needed. + * When setting an index partition unclustered, also remove + * indisclustered from its parents + * use this instead? get_partition_ancestors() */ - pg_index = table_open(IndexRelationId, RowExclusiveLock); - foreach(index, RelationGetIndexList(rel)) + foreach (lc, RelationGetIndexList(rel)) { - Oid thisIndexOid = lfirst_oid(index); + Oid thisIndexOid = lfirst_oid(lc); - indexTuple = SearchSysCacheCopy1(INDEXRELID, - ObjectIdGetDatum(thisIndexOid)); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", thisIndexOid); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + if (thisIndexOid == indexOid) + continue; - /* - * Unset the bit if set. We know it's wrong because we checked this - * earlier. - */ - if (indexForm->indisclustered) + while (get_rel_relispartition(thisIndexOid)) { - indexForm->indisclustered = false; - CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); + thisIndexOid = get_partition_parent(thisIndexOid); + set_indisclustered(thisIndexOid, false, pg_index); } - else if (thisIndexOid == indexOid) - { - /* this was checked earlier, but let's be real sure */ - if (!indexForm->indisvalid) - elog(ERROR, "cannot cluster on invalid index %u", indexOid); - indexForm->indisclustered = true; - CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple); - } - - InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, - InvalidOid, is_internal); - - heap_freetuple(indexTuple); } table_close(pg_index, RowExclusiveLock); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 67bd9b12dc..206eb4d656 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -26,12 +26,14 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/pg_am.h" +#include "catalog/partition.h" #include "catalog/pg_constraint.h" #include "catalog/pg_inherits.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" +#include "commands/cluster.h" #include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/defrem.h" @@ -3773,6 +3775,24 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid) /* set relispartition correctly on the partition */ update_relispartition(partRelid, OidIsValid(parentOid)); + /* if the attached index is not clustered, invalidate all parents cluster mark, if any */ + if ((OidIsValid(parentOid) && get_index_isclustered(parentOid)) || + get_index_isclustered(partRelid)) + { + Relation indrel; + + /* Make relispartition visible */ + CommandCounterIncrement(); + + indrel = table_open(IndexGetRelation(partRelid, false), + ShareUpdateExclusiveLock); + mark_index_clustered(indrel, + get_index_isclustered(partRelid) ? partRelid : InvalidOid, + true); + table_close(indrel, ShareUpdateExclusiveLock); + + } + if (fix_dependencies) { /* diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index 8f245da46d..91d0c03055 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -505,6 +505,49 @@ Partition of: clstrpart FOR VALUES FROM (30) TO (40) Indexes: "clstrpart4_a_idx" btree (a) CLUSTER +-- Check that attaching an unclustered index marks the parent unclustered: +CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES); +ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50); +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) +Number of partitions: 5 (Use \d+ to list them.) + +-- Check that the parent index is marked not clustered after clustering a partition on a different index: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a); +CLUSTER clstrpart1 USING clstrpart1_idx_2; +\d clstrpart + Partitioned table "public.clstrpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: RANGE (a) +Indexes: + "clstrpart_idx" btree (a) +Number of partitions: 5 (Use \d+ to list them.) + +-- Check that only one child is marked clustered after marking clustered on a different parent +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx_2; +ERROR: index "clstrpart_idx_2" for table "clstrpart" does not exist +\d clstrpart1 + Partitioned table "public.clstrpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: clstrpart FOR VALUES FROM (1) TO (10) +Partition key: RANGE (a) +Indexes: + "clstrpart1_a_idx" btree (a) CLUSTER + "clstrpart1_idx_2" btree (a) +Number of partitions: 2 (Use \d+ to list them.) + DROP TABLE clstrpart; -- Test CLUSTER with external tuplesorting create table clstr_4 as select * from tenk1; diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index b871ab53c3..635df97257 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -222,6 +222,19 @@ CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf \d clstrpart CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40); \d clstrpart4 +-- Check that attaching an unclustered index marks the parent unclustered: +CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES); +ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50); +\d clstrpart +-- Check that the parent index is marked not clustered after clustering a partition on a different index: +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a); +CLUSTER clstrpart1 USING clstrpart1_idx_2; +\d clstrpart +-- Check that only one child is marked clustered after marking clustered on a different parent +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx; +ALTER TABLE clstrpart CLUSTER ON clstrpart_idx_2; +\d clstrpart1 DROP TABLE clstrpart; -- Test CLUSTER with external tuplesorting -- 2.17.0