On Sat, Nov 28, 2020 at 08:03:02PM -0600, Justin Pryzby wrote:
> On Sun, Nov 15, 2020 at 07:53:35PM -0600, Justin Pryzby wrote:
> > On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote:
> > > On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote:
> > > > I'm attaching a counter-proposal to your catalog change, which preserves
> > > > indisclustered on children of clustered, partitioned indexes, and 
> > > > invalidates
> > > > indisclustered when attaching unclustered indexes.
> > > 
> > > ..and now propagates CLUSTER ON to child indexes.
> > > 
> > > I left this as separate patches to show what I mean and what's new while 
> > > we
> > > discuss it.
> > 
> > This fixes some omissions in the previous patch and error in its test cases.
> > 
> > CLUSTER ON recurses to children, since I think a clustered parent index 
> > means
> > that all its child indexes are clustered.  "SET WITHOUT CLUSTER" doesn't 
> > have
> > to recurse to children, but I did it like that for consistency and it avoids
> > the need to special case InvalidOid.
> 
> The previous patch failed pg_upgrade when restoring a clustered, parent index,
> since it's marked INVALID until indexes have been built on all child tables, 
> so
> CLUSTER ON was rejected on invalid index.
> 
> So I think CLUSTER ON needs to be a separate pg_dump object, to allow 
> attaching
> the child index (thereby making the parent "valid") to happen before SET
> CLUSTER on the parent index.

Rebased on b5913f612 and now a3dc92600.

This patch is intertwined with the tablespace patch: not only will it get
rebase conflict, but will also need to test the functionality of
CLUSTER (TABLESPACE a) partitioned_table;

-- 
Justin
>From cb817c479d2a2d4ae94cfa8d215e369b5d206738 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 26 Nov 2020 14:37:08 -0600
Subject: [PATCH v6 1/7] pg_dump: make CLUSTER ON a separate dump object..

..since it needs to be restored after any child indexes are restored *and
attached*.  The order needs to be:

1) restore child and parent index (order doesn't matter);
2) attach child index;
3) set cluster on child and parent index (order doesn't matter);
---
 src/bin/pg_dump/pg_dump.c      | 86 ++++++++++++++++++++++++++--------
 src/bin/pg_dump/pg_dump.h      |  8 ++++
 src/bin/pg_dump/pg_dump_sort.c |  8 ++++
 3 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 798d14580e..e6526392e5 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -208,6 +208,7 @@ static void dumpSequence(Archive *fout, TableInfo *tbinfo);
 static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
 static void dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo);
+static void dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo);
 static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
@@ -7092,6 +7093,11 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 				i_inddependcollversions;
 	int			ntups;
 
+	int	ncluster = 0;
+	IndexClusterInfo *clusterinfo;
+	clusterinfo = (IndexClusterInfo *)
+		pg_malloc0(numTables * sizeof(IndexClusterInfo));
+
 	for (i = 0; i < numTables; i++)
 	{
 		TableInfo  *tbinfo = &tblinfo[i];
@@ -7471,6 +7477,27 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 				/* Plain secondary index */
 				indxinfo[j].indexconstraint = 0;
 			}
+
+			/* Record each table's CLUSTERed index, if any */
+			if (indxinfo[j].indisclustered)
+			{
+				IndxInfo   *index = &indxinfo[j];
+				IndexClusterInfo *cluster = &clusterinfo[ncluster];
+
+				cluster->dobj.objType = DO_INDEX_CLUSTER_ON;
+				cluster->dobj.catId.tableoid = 0;
+				cluster->dobj.catId.oid = 0;
+				AssignDumpId(&cluster->dobj);
+				cluster->dobj.name = pg_strdup(index->dobj.name);
+				cluster->dobj.namespace = index->indextable->dobj.namespace;
+				cluster->index = index;
+				cluster->indextable = &tblinfo[i];
+
+				/* The CLUSTER ON depends on its index.. */
+				addObjectDependency(&cluster->dobj, index->dobj.dumpId);
+
+				ncluster++;
+			}
 		}
 
 		PQclear(res);
@@ -10296,6 +10323,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 		case DO_SUBSCRIPTION:
 			dumpSubscription(fout, (SubscriptionInfo *) dobj);
 			break;
+		case DO_INDEX_CLUSTER_ON:
+			dumpIndexClusterOn(fout, (IndexClusterInfo *) dobj);
+			break;
 		case DO_PRE_DATA_BOUNDARY:
 		case DO_POST_DATA_BOUNDARY:
 			/* never dumped, nothing to do */
@@ -16516,6 +16546,41 @@ getAttrName(int attrnum, TableInfo *tblInfo)
 	return NULL;				/* keep compiler quiet */
 }
 
+/*
+ * dumpIndexClusterOn
+ *	  record that the index is clustered.
+ */
+static void
+dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo)
+{
+	DumpOptions *dopt = fout->dopt;
+	TableInfo	*tbinfo = clusterinfo->indextable;
+	char		*qindxname;
+	PQExpBuffer	q;
+
+	if (dopt->dataOnly)
+		return;
+
+	q = createPQExpBuffer();
+	qindxname = pg_strdup(fmtId(clusterinfo->dobj.name));
+
+	/* index name is not qualified in this syntax */
+	appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER ON %s;\n",
+					  fmtQualifiedDumpable(tbinfo), qindxname);
+
+	if (clusterinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
+		ArchiveEntry(fout, clusterinfo->dobj.catId, clusterinfo->dobj.dumpId,
+					 ARCHIVE_OPTS(.tag = clusterinfo->dobj.name,
+								  .namespace = tbinfo->dobj.namespace->dobj.name,
+								  .owner = tbinfo->rolname,
+								  .description = "INDEX CLUSTER ON",
+								  .section = SECTION_POST_DATA,
+								  .createStmt = q->data));
+
+	destroyPQExpBuffer(q);
+	free(qindxname);
+}
+
 /*
  * dumpIndex
  *	  write out to fout a user-defined index
@@ -16570,16 +16635,6 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
 		 * similar code in dumpConstraint!
 		 */
 
-		/* If the index is clustered, we need to record that. */
-		if (indxinfo->indisclustered)
-		{
-			appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER",
-							  fmtQualifiedDumpable(tbinfo));
-			/* index name is not qualified in this syntax */
-			appendPQExpBuffer(q, " ON %s;\n",
-							  qindxname);
-		}
-
 		/*
 		 * If the index has any statistics on some of its columns, generate
 		 * the associated ALTER INDEX queries.
@@ -16906,16 +16961,6 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo)
 		 * similar code in dumpIndex!
 		 */
 
-		/* If the index is clustered, we need to record that. */
-		if (indxinfo->indisclustered)
-		{
-			appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER",
-							  fmtQualifiedDumpable(tbinfo));
-			/* index name is not qualified in this syntax */
-			appendPQExpBuffer(q, " ON %s;\n",
-							  fmtId(indxinfo->dobj.name));
-		}
-
 		/* If the index defines identity, we need to record that. */
 		if (indxinfo->indisreplident)
 		{
@@ -18421,6 +18466,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
 				break;
 			case DO_INDEX:
 			case DO_INDEX_ATTACH:
+			case DO_INDEX_CLUSTER_ON:
 			case DO_STATSEXT:
 			case DO_REFRESH_MATVIEW:
 			case DO_TRIGGER:
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 1290f9659b..57def4c009 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -54,6 +54,7 @@ typedef enum
 	DO_ATTRDEF,
 	DO_INDEX,
 	DO_INDEX_ATTACH,
+	DO_INDEX_CLUSTER_ON,
 	DO_STATSEXT,
 	DO_RULE,
 	DO_TRIGGER,
@@ -386,6 +387,13 @@ typedef struct _indxInfo
 	DumpId		indexconstraint;
 } IndxInfo;
 
+typedef struct _indexClusterInfo
+{
+	DumpableObject dobj;
+	TableInfo  *indextable;		/* link to table the index is for */
+	IndxInfo   *index;		/* link to index itself */
+} IndexClusterInfo;
+
 typedef struct _indexAttachInfo
 {
 	DumpableObject dobj;
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 46461fb6a1..dd5b233196 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -75,6 +75,7 @@ enum dbObjectTypePriorities
 	PRIO_CONSTRAINT,
 	PRIO_INDEX,
 	PRIO_INDEX_ATTACH,
+	PRIO_INDEX_CLUSTER_ON,
 	PRIO_STATSEXT,
 	PRIO_RULE,
 	PRIO_TRIGGER,
@@ -108,6 +109,7 @@ static const int dbObjectTypePriority[] =
 	PRIO_ATTRDEF,				/* DO_ATTRDEF */
 	PRIO_INDEX,					/* DO_INDEX */
 	PRIO_INDEX_ATTACH,			/* DO_INDEX_ATTACH */
+	PRIO_INDEX_CLUSTER_ON,			/* DO_INDEX_CLUSTER_ON */
 	PRIO_STATSEXT,				/* DO_STATSEXT */
 	PRIO_RULE,					/* DO_RULE */
 	PRIO_TRIGGER,				/* DO_TRIGGER */
@@ -136,6 +138,7 @@ static const int dbObjectTypePriority[] =
 	PRIO_PUBLICATION,			/* DO_PUBLICATION */
 	PRIO_PUBLICATION_REL,		/* DO_PUBLICATION_REL */
 	PRIO_SUBSCRIPTION			/* DO_SUBSCRIPTION */
+
 };
 
 StaticAssertDecl(lengthof(dbObjectTypePriority) == (DO_SUBSCRIPTION + 1),
@@ -1348,6 +1351,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
 					 "INDEX ATTACH %s  (ID %d)",
 					 obj->name, obj->dumpId);
 			return;
+		case DO_INDEX_CLUSTER_ON:
+			snprintf(buf, bufsize,
+					 "INDEX CLUSTER ON %s  (ID %d)",
+					 obj->name, obj->dumpId);
+			return;
 		case DO_STATSEXT:
 			snprintf(buf, bufsize,
 					 "STATISTICS %s  (ID %d OID %u)",
-- 
2.17.0

>From f5b22edbfbf80e057a5795ab00e92ce41ad6168c Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 16:58:42 -0500
Subject: [PATCH v6 2/7] Implement CLUSTER of partitioned table..

This requires either specification of a partitioned index on which to cluster,
or that an partitioned index was previously set clustered.
---
 doc/src/sgml/ref/cluster.sgml         |   6 +
 src/backend/commands/cluster.c        | 172 +++++++++++++++++++-------
 src/bin/psql/tab-complete.c           |   1 +
 src/include/commands/cluster.h        |   1 +
 src/test/regress/expected/cluster.out |  58 ++++++++-
 src/test/regress/sql/cluster.sql      |  24 +++-
 6 files changed, 208 insertions(+), 54 deletions(-)

diff --git a/doc/src/sgml/ref/cluster.sgml b/doc/src/sgml/ref/cluster.sgml
index 5dd21a0189..fb5deddb35 100644
--- a/doc/src/sgml/ref/cluster.sgml
+++ b/doc/src/sgml/ref/cluster.sgml
@@ -192,6 +192,12 @@ CLUSTER [VERBOSE]
     are periodically reclustered.
    </para>
 
+   <para>
+    Clustering a partitioned table clusters each of its partitions using the
+    index partition of the given partitioned index or (if not specified) the
+    partitioned index marked as clustered.
+   </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 096a06f7b3..838bcd9e72 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -32,7 +32,9 @@
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
 #include "catalog/toasting.h"
 #include "commands/cluster.h"
 #include "commands/defrem.h"
@@ -73,6 +75,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+		Oid indexOid);
+static void cluster_multiple_rels(List *rvs, int options);
 
 
 /*---------------------------------------------------------------------------
@@ -135,7 +140,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 											AccessExclusiveLock,
 											0,
 											RangeVarCallbackOwnsTable, NULL);
-		rel = table_open(tableOid, NoLock);
+		rel = table_open(tableOid, ShareUpdateExclusiveLock);
 
 		/*
 		 * Reject clustering a remote temp table ... their local buffer
@@ -146,14 +151,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("cannot cluster temporary tables of other sessions")));
 
-		/*
-		 * Reject clustering a partitioned table.
-		 */
-		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot cluster a partitioned table")));
-
 		if (stmt->indexname == NULL)
 		{
 			ListCell   *index;
@@ -191,8 +188,32 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 		/* close relation, keep lock till commit */
 		table_close(rel, NoLock);
 
-		/* Do the job. */
-		cluster_rel(tableOid, indexOid, &params);
+		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			/* Do the job. */
+			cluster_rel(tableOid, indexOid, &params);
+		}
+		else
+		{
+			List	   *rvs;
+			MemoryContext cluster_context;
+
+			/* Refuse to hold strong locks in a user transaction */
+			PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+			cluster_context = AllocSetContextCreate(PortalContext,
+												"Cluster",
+												ALLOCSET_DEFAULT_SIZES);
+
+			rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+			cluster_multiple_rels(rvs, params.options);
+
+			/* Start a new transaction for the cleanup work. */
+			StartTransactionCommand();
+
+			/* Clean up working storage */
+			MemoryContextDelete(cluster_context);
+		}
 	}
 	else
 	{
@@ -202,7 +223,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 		 */
 		MemoryContext cluster_context;
 		List	   *rvs;
-		ListCell   *rv;
 
 		/*
 		 * We cannot run this form of CLUSTER inside a user transaction block;
@@ -225,28 +245,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 		 * cluster_context.
 		 */
 		rvs = get_tables_to_cluster(cluster_context);
-
-		/* Commit to get out of starting transaction */
-		PopActiveSnapshot();
-		CommitTransactionCommand();
-
-		/* Ok, now that we've got them all, cluster them one by one */
-		foreach(rv, rvs)
-		{
-			RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
-			ClusterParams cluster_params = params;
-
-			/* Start a new transaction for each relation. */
-			StartTransactionCommand();
-			/* functions in indexes may want a snapshot set */
-			PushActiveSnapshot(GetTransactionSnapshot());
-			/* Do the job. */
-			cluster_params.options |= CLUOPT_RECHECK;
-			cluster_rel(rvtc->tableOid, rvtc->indexOid,
-						&cluster_params);
-			PopActiveSnapshot();
-			CommitTransactionCommand();
-		}
+		cluster_multiple_rels(rvs, params.options | CLUOPT_RECHECK_ISCLUSTERED);
 
 		/* Start a new transaction for the cleanup work. */
 		StartTransactionCommand();
@@ -352,9 +351,10 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 			}
 
 			/*
-			 * Check that the index is still the one with indisclustered set.
+			 * Check that the index is still the one with indisclustered set, if needed.
 			 */
-			if (!get_index_isclustered(indexOid))
+			if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
+					!get_index_isclustered(indexOid))
 			{
 				relation_close(OldHeap, AccessExclusiveLock);
 				pgstat_progress_end_command();
@@ -398,8 +398,13 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 
 	/* Check heap and index are valid to cluster on */
 	if (OidIsValid(indexOid))
+	{
 		check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
 
+		/* Mark the index as clustered */
+		mark_index_clustered(OldHeap, indexOid, true);
+	}
+
 	/*
 	 * Quietly ignore the request if this is a materialized view which has not
 	 * been populated from its query. No harm is done because there is no data
@@ -415,6 +420,14 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 		return;
 	}
 
+	/* For a partitioned rel, we're done. */
+	if (!RELKIND_HAS_STORAGE(get_rel_relkind(tableOid)))
+	{
+		relation_close(OldHeap, AccessExclusiveLock);
+		pgstat_progress_end_command();
+		return;
+	}
+
 	/*
 	 * All predicate locks on the tuples or pages are about to be made
 	 * invalid, because we move tuples around.  Promote them to relation
@@ -483,6 +496,9 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD
 	 * the worst consequence of following broken HOT chains would be that we
 	 * might put recently-dead tuples out-of-order in the new table, and there
 	 * is little harm in that.)
+	 *
+	 * This also refuses to cluster on an "incomplete" partitioned index
+	 * created with "ON ONLY".
 	 */
 	if (!OldIndex->rd_index->indisvalid)
 		ereport(ERROR,
@@ -507,12 +523,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	Relation	pg_index;
 	ListCell   *index;
 
-	/* Disallow applying to a partitioned table */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("cannot mark index clustered in partitioned table")));
-
 	/*
 	 * If the index is already marked clustered, no need to do anything.
 	 */
@@ -584,10 +594,6 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
 	TransactionId frozenXid;
 	MultiXactId cutoffMulti;
 
-	/* Mark the correct index as clustered */
-	if (OidIsValid(indexOid))
-		mark_index_clustered(OldHeap, indexOid, true);
-
 	/* Remember info about rel before closing OldHeap */
 	relpersistence = OldHeap->rd_rel->relpersistence;
 	is_system_catalog = IsSystemRelation(OldHeap);
@@ -1582,3 +1588,77 @@ get_tables_to_cluster(MemoryContext cluster_context)
 
 	return rvs;
 }
+
+/*
+ * Return a List of tables and associated index, where each index is a
+ * partition of the given index
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+	List		*inhoids;
+	ListCell	*lc;
+	List		*rvs = NIL;
+	MemoryContext	old_context;
+
+	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+
+	foreach(lc, inhoids)
+	{
+		Oid		indexrelid = lfirst_oid(lc);
+		Oid		relid = IndexGetRelation(indexrelid, false);
+		RelToCluster	*rvtc;
+
+		/*
+		 * Partitioned rels are also processed by cluster_rel, to
+		 * call check_index_is_clusterable() and mark_index_clustered().
+		 */
+
+		/*
+		 * We have to build the list in a different memory context so it will
+		 * survive the cross-transaction processing
+		 */
+		old_context = MemoryContextSwitchTo(cluster_context);
+
+		rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rvtc->tableOid = relid;
+		rvtc->indexOid = indexrelid;
+		rvs = lappend(rvs, rvtc);
+
+		MemoryContextSwitchTo(old_context);
+	}
+
+	return rvs;
+}
+
+/* Cluster each relation in a separate transaction */
+static void
+cluster_multiple_rels(List *rvs, int options)
+{
+	ListCell *lc;
+
+	/* Commit to get out of starting transaction */
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/* Ok, now that we've got them all, cluster them one by one */
+	foreach(lc, rvs)
+	{
+		RelToCluster *rvtc = (RelToCluster *) lfirst(lc);
+		ClusterParams cluster_params = { .options = options, };
+
+		/* Start a new transaction for each relation. */
+		StartTransactionCommand();
+
+		/* functions in indexes may want a snapshot set */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/* Do the job. */
+		cluster_params.options |= CLUOPT_RECHECK;
+		cluster_rel(rvtc->tableOid, rvtc->indexOid,
+					&cluster_params);
+
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+	}
+}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 6abcbea963..3a6a4831e1 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -588,6 +588,7 @@ static const SchemaQuery Query_for_list_of_clusterables = {
 	.catname = "pg_catalog.pg_class c",
 	.selcondition =
 	"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+	CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
 	CppAsString2(RELKIND_MATVIEW) ")",
 	.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
 	.namespace = "c.relnamespace",
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index a941f2accd..c30ca01726 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -22,6 +22,7 @@
 /* flag bits for ClusterParams->flags */
 #define CLUOPT_RECHECK 0x01		/* recheck relation state */
 #define CLUOPT_VERBOSE 0x02		/* print progress info */
+#define CLUOPT_RECHECK_ISCLUSTERED 0x04	/* recheck relation state for indisclustered */
 
 /* options for CLUSTER */
 typedef struct ClusterParams
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index bdae8fe00c..e4448350e7 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -439,14 +439,62 @@ select * from clstr_temp;
 
 drop table clstr_temp;
 RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+ERROR:  cannot cluster on invalid index "clstrpart_only_idx"
+DROP INDEX clstrpart_only_idx;
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
-ERROR:  cannot mark index clustered in partitioned table
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
 CLUSTER clstrpart USING clstrpart_idx;
-ERROR:  cannot cluster a partitioned table
-DROP TABLE clstrpart;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY 1;
+   relname   | relkind | ?column? 
+-------------+---------+----------
+ clstrpart   | p       | t
+ clstrpart1  | p       | t
+ clstrpart11 | r       | f
+ clstrpart12 | p       | t
+ clstrpart2  | r       | f
+ clstrpart3  | p       | t
+ clstrpart33 | r       | f
+(7 rows)
+
+-- Check that clustering sets new indisclustered:
+SELECT i.indexrelid::regclass::text, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY 1;
+    indexrelid     | relkind | indisclustered 
+-------------------+---------+----------------
+ clstrpart11_a_idx | i       | t
+ clstrpart12_a_idx | I       | t
+ clstrpart1_a_idx  | I       | t
+ clstrpart2_a_idx  | i       | t
+ clstrpart33_a_idx | i       | t
+ clstrpart3_a_idx  | I       | t
+ clstrpart_idx     | I       | t
+(7 rows)
+
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a) CLUSTER
+Number of partitions: 3 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 188183647c..22225dc924 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -196,12 +196,30 @@ drop table clstr_temp;
 
 RESET SESSION AUTHORIZATION;
 
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+DROP INDEX clstrpart_only_idx;
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
 CLUSTER clstrpart USING clstrpart_idx;
-DROP TABLE clstrpart;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY 1;
+-- Check that clustering sets new indisclustered:
+SELECT i.indexrelid::regclass::text, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY 1;
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From d99d6b22da07f1c575403411ee8d042e8bae34f9 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 6 Oct 2020 22:11:12 -0500
Subject: [PATCH v6 3/7] Propagate changes to indisclustered to child/parents

---
 src/backend/commands/cluster.c        | 109 ++++++++++++++++----------
 src/backend/commands/indexcmds.c      |   2 +
 src/test/regress/expected/cluster.out |  46 +++++++++++
 src/test/regress/sql/cluster.sql      |  11 +++
 4 files changed, 125 insertions(+), 43 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 838bcd9e72..078b97fbb9 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -74,6 +74,7 @@ static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
+static void set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
 static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
 		Oid indexOid);
@@ -510,66 +511,88 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD
 	index_close(OldIndex, NoLock);
 }
 
+/*
+ * Helper for mark_index_clustered
+ * Mark a single index as clustered or not.
+ * pg_index is passed by caller to avoid repeatedly re-opening it.
+ */
+static void
+set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index)
+{
+	HeapTuple	indexTuple;
+	Form_pg_index indexForm;
+
+	indexTuple = SearchSysCacheCopy1(INDEXRELID,
+									ObjectIdGetDatum(indexOid));
+	if (!HeapTupleIsValid(indexTuple))
+		elog(ERROR, "cache lookup failed for index %u", indexOid);
+	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+	/* this was checked earlier, but let's be real sure */
+	if (isclustered && !indexForm->indisvalid)
+		elog(ERROR, "cannot cluster on invalid index %u", indexOid);
+
+	indexForm->indisclustered = isclustered;
+	CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+	heap_freetuple(indexTuple);
+}
+
 /*
  * mark_index_clustered: mark the specified index as the one clustered on
  *
- * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
+ * With indexOid == InvalidOid, mark all indexes of rel not-clustered.
+ * Otherwise, mark children of the clustered index as clustered, and parents of
+ * other indexes as unclustered.
+ * We wish to maintain the following properties:
+ * 1) Only one index on a relation can be marked clustered at once
+ * 2) If a partitioned index is clustered, then all its children must be
+ *     clustered.
  */
 void
 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 {
-	HeapTuple	indexTuple;
-	Form_pg_index indexForm;
-	Relation	pg_index;
-	ListCell   *index;
-
-	/*
-	 * If the index is already marked clustered, no need to do anything.
-	 */
-	if (OidIsValid(indexOid))
-	{
-		if (get_index_isclustered(indexOid))
-			return;
-	}
+	ListCell	*lc, *lc2;
+	List		*indexes;
+	Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+	List		*inh = find_all_inheritors(RelationGetRelid(rel), ShareRowExclusiveLock, NULL);
 
 	/*
 	 * Check each index of the relation and set/clear the bit as needed.
+	 * Iterate over the relation's children rather than the index's children
+	 * since we need to unset cluster for indexes on intermediate children,
+	 * too.
 	 */
-	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-
-	foreach(index, RelationGetIndexList(rel))
+	foreach(lc, inh)
 	{
-		Oid			thisIndexOid = lfirst_oid(index);
-
-		indexTuple = SearchSysCacheCopy1(INDEXRELID,
-										 ObjectIdGetDatum(thisIndexOid));
-		if (!HeapTupleIsValid(indexTuple))
-			elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
-		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+		Oid			inhrelid = lfirst_oid(lc);
+		Relation	thisrel = table_open(inhrelid, ShareRowExclusiveLock);
 
-		/*
-		 * Unset the bit if set.  We know it's wrong because we checked this
-		 * earlier.
-		 */
-		if (indexForm->indisclustered)
+		indexes = RelationGetIndexList(thisrel);
+		foreach (lc2, indexes)
 		{
-			indexForm->indisclustered = false;
-			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
-		}
-		else if (thisIndexOid == indexOid)
-		{
-			/* this was checked earlier, but let's be real sure */
-			if (!indexForm->indisvalid)
-				elog(ERROR, "cannot cluster on invalid index %u", indexOid);
-			indexForm->indisclustered = true;
-			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
-		}
+			bool	isclustered;
+			Oid		thisIndexOid = lfirst_oid(lc2);
+			List	*parentoids = get_rel_relispartition(thisIndexOid) ?
+				get_partition_ancestors(thisIndexOid) : NIL;
 
-		InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
-									 InvalidOid, is_internal);
+			/*
+			 * A child of the clustered index must be set clustered;
+			 * indexes which are not children of the clustered index are
+			 * set unclustered
+			 */
+			isclustered = (thisIndexOid == indexOid) ||
+					list_member_oid(parentoids, indexOid);
+			Assert(OidIsValid(indexOid) || !isclustered);
+			set_indisclustered(thisIndexOid, isclustered, pg_index);
+
+			InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+										 InvalidOid, is_internal);
+		}
 
-		heap_freetuple(indexTuple);
+		list_free(indexes);
+		table_close(thisrel, ShareRowExclusiveLock);
 	}
+	list_free(inh);
 
 	table_close(pg_index, RowExclusiveLock);
 }
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index f9f3ff3b62..0a65698c28 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -25,6 +25,7 @@
 #include "catalog/catalog.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
@@ -32,6 +33,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
+#include "commands/cluster.h"
 #include "commands/comment.h"
 #include "commands/dbcommands.h"
 #include "commands/defrem.h"
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index e4448350e7..a9fb9f1021 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -495,6 +495,52 @@ Indexes:
     "clstrpart_idx" btree (a) CLUSTER
 Number of partitions: 3 (Use \d+ to list them.)
 
+-- Test that it recurses to grandchildren:
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a) CLUSTER
+
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a)
+
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a) CLUSTER
+
+-- Check that only one child is marked clustered after marking clustered on a different parent
+CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
+\d clstrpart1
+       Partitioned table "public.clstrpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart FOR VALUES FROM (1) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart1_a_idx" btree (a)
+    "clstrpart1_idx_2" btree (a) CLUSTER
+Number of partitions: 2 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 22225dc924..d15bd51496 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -220,6 +220,17 @@ CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitio
 CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
 CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
 \d clstrpart
+-- Test that it recurses to grandchildren:
+\d clstrpart33
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+\d clstrpart33
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+\d clstrpart33
+-- Check that only one child is marked clustered after marking clustered on a different parent
+CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
+\d clstrpart1
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 690be02168bce28b8f110946d4f04f449639f2a2 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Nov 2020 18:58:03 -0600
Subject: [PATCH v6 4/7] Invalidate parent indexes

---
 src/backend/commands/cluster.c        | 21 +++++++++++++++++++++
 src/test/regress/expected/cluster.out | 26 ++++++++++++++++++++++++++
 src/test/regress/sql/cluster.sql      |  8 ++++++++
 3 files changed, 55 insertions(+)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 078b97fbb9..cdb49985ce 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -594,6 +594,27 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	}
 	list_free(inh);
 
+	/*
+	 * Set parent of all indexes as unclustered when a rel is unclustered; and,
+	 * when an index is clustered, set parents of all /other/ indexes as
+	 * unclustered.
+	 */
+	indexes = RelationGetIndexList(rel);
+	foreach (lc, indexes)
+	{
+		Oid	thisIndexOid = lfirst_oid(lc);
+
+		if (thisIndexOid == indexOid)
+			continue;
+
+		while (get_rel_relispartition(thisIndexOid))
+		{
+			thisIndexOid = get_partition_parent(thisIndexOid);
+			set_indisclustered(thisIndexOid, false, pg_index);
+		}
+	}
+	list_free(indexes);
+
 	table_close(pg_index, RowExclusiveLock);
 }
 
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index a9fb9f1021..6d88978387 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -541,6 +541,32 @@ Indexes:
     "clstrpart1_idx_2" btree (a) CLUSTER
 Number of partitions: 2 (Use \d+ to list them.)
 
+-- Check that the parent index is marked not clustered after clustering a partition on a different index:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_idx_2;
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
+-- Check that the parent index is marked not clustered after setting a partition not clustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index d15bd51496..c9bb204a93 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -231,6 +231,14 @@ CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
 \d clstrpart1
+-- Check that the parent index is marked not clustered after clustering a partition on a different index:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_idx_2;
+\d clstrpart
+-- Check that the parent index is marked not clustered after setting a partition not clustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 811dbf5135da57cd0ee14f213f566b092a264c3a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Nov 2020 19:11:41 -0600
Subject: [PATCH v6 5/7] Invalidate parent index cluster on attach

---
 src/backend/commands/indexcmds.c      | 21 +++++++++++++++++++++
 src/test/regress/expected/cluster.out | 14 ++++++++++++++
 src/test/regress/sql/cluster.sql      |  5 +++++
 3 files changed, 40 insertions(+)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 0a65698c28..ffc809822b 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -4006,6 +4006,27 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
 	/* set relispartition correctly on the partition */
 	update_relispartition(partRelid, OidIsValid(parentOid));
 
+	/*
+	 * If the attached index is not clustered, invalidate cluster mark on
+	 * any parents
+	 */
+	if ((OidIsValid(parentOid) && get_index_isclustered(parentOid)) ||
+			get_index_isclustered(partRelid))
+	{
+		Relation indrel;
+
+		/* Make relispartition visible */
+		CommandCounterIncrement();
+
+		indrel = table_open(IndexGetRelation(partRelid, false),
+				ShareUpdateExclusiveLock);
+		mark_index_clustered(indrel,
+				get_index_isclustered(partRelid) ? partRelid : InvalidOid,
+				true);
+		table_close(indrel, ShareUpdateExclusiveLock);
+
+	}
+
 	if (fix_dependencies)
 	{
 		/*
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 6d88978387..f0c962db75 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -567,6 +567,20 @@ Indexes:
     "clstrpart_idx" btree (a)
 Number of partitions: 3 (Use \d+ to list them.)
 
+-- Check that attaching an unclustered index marks the parent unclustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
+ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 4 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index c9bb204a93..ff7ffed6e4 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -239,6 +239,11 @@ CLUSTER clstrpart1 USING clstrpart1_idx_2;
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
 \d clstrpart
+-- Check that attaching an unclustered index marks the parent unclustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
+ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 50edd08cf9893e2d29c849825b812a6706d4bc61 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 6 Oct 2020 20:40:18 -0500
Subject: [PATCH v6 6/7] Preserve indisclustered on children of clustered,
 partitioned indexes

Note, this takes a parentIndex, but that wasn't previously used ...
UpdateIndexRelation(Oid indexoid, Oid heapoid, Oid parentIndexId,
---
 src/backend/catalog/index.c           |  2 +-
 src/test/regress/expected/cluster.out | 12 ++++++++++++
 src/test/regress/sql/cluster.sql      |  4 ++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index b8cd35e995..e15fb42c9c 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -603,7 +603,7 @@ UpdateIndexRelation(Oid indexoid,
 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
-	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
+	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(OidIsValid(parentIndexId) && get_index_isclustered(parentIndexId));
 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index f0c962db75..fc642f87d2 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -581,6 +581,18 @@ Indexes:
     "clstrpart_idx" btree (a)
 Number of partitions: 4 (Use \d+ to list them.)
 
+-- Check that new children inherit clustered mark
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40);
+\d clstrpart4
+             Table "public.clstrpart4"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart FOR VALUES FROM (30) TO (40)
+Indexes:
+    "clstrpart4_a_idx" btree (a) CLUSTER
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index ff7ffed6e4..5df338f60d 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -244,6 +244,10 @@ ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
 ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
 \d clstrpart
+-- Check that new children inherit clustered mark
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40);
+\d clstrpart4
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From b55a919ef90e82f5233b2a8f789c129b264615cb Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Wed, 25 Nov 2020 17:34:07 -0600
Subject: [PATCH v6 7/7] pg_dump: partitioned index depend on its partitions

This is required for restoring clustered parent index, which is marked INVALID
until indexes have been built on all its child tables, and it's prohibited to
CLUSTER ON an INVALID index

See also: 8cff4f5348d075e063100071013f00a900c32b0f
---
 src/backend/commands/tablecmds.c | 6 +++---
 src/bin/pg_dump/common.c         | 8 ++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 8687e9a97c..865ab6c2e9 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -17493,6 +17493,9 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
 		table_close(idxRel, RowExclusiveLock);
 	}
 
+	/* make sure we see the validation we just did */
+	CommandCounterIncrement();
+
 	/*
 	 * If this index is in turn a partition of a larger index, validating it
 	 * might cause the parent to become valid also.  Try that.
@@ -17504,9 +17507,6 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
 		Relation	parentIdx,
 					parentTbl;
 
-		/* make sure we see the validation we just did */
-		CommandCounterIncrement();
-
 		parentIdxId = get_partition_parent(RelationGetRelid(partedIdx));
 		parentTblId = get_partition_parent(RelationGetRelid(partedTbl));
 		parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index b0f02bc1f6..2a7d9b463c 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -429,6 +429,12 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			attachinfo[k].parentIdx = parentidx;
 			attachinfo[k].partitionIdx = index;
 
+			/*
+			 * We want dependencies from parent to partition (so that the
+			 * partition index is created first)
+			 */
+			addObjectDependency(&parentidx->dobj, index->dobj.dumpId);
+
 			/*
 			 * We must state the DO_INDEX_ATTACH object's dependencies
 			 * explicitly, since it will not match anything in pg_depend.
@@ -446,6 +452,8 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			 */
 			addObjectDependency(&attachinfo[k].dobj, index->dobj.dumpId);
 			addObjectDependency(&attachinfo[k].dobj, parentidx->dobj.dumpId);
+			// addObjectDependency(&parentidx->dobj, attachinfo[k].dobj.dumpId);
+
 			addObjectDependency(&attachinfo[k].dobj,
 								index->indextable->dobj.dumpId);
 			addObjectDependency(&attachinfo[k].dobj,
-- 
2.17.0

Reply via email to