On Mon, Jan 18, 2021 at 12:34:59PM -0600, Justin Pryzby wrote:
> On Sat, Nov 28, 2020 at 08:03:02PM -0600, Justin Pryzby wrote:
> > On Sun, Nov 15, 2020 at 07:53:35PM -0600, Justin Pryzby wrote:
> > > On Wed, Nov 04, 2020 at 08:23:56PM -0600, Justin Pryzby wrote:
> > > > On Tue, Oct 27, 2020 at 07:33:12PM -0500, Justin Pryzby wrote:
> > > > > I'm attaching a counter-proposal to your catalog change, which 
> > > > > preserves
> > > > > indisclustered on children of clustered, partitioned indexes, and 
> > > > > invalidates
> > > > > indisclustered when attaching unclustered indexes.
> > > > 
> > > > ..and now propagates CLUSTER ON to child indexes.
> > > > 
> > > > I left this as separate patches to show what I mean and what's new 
> > > > while we
> > > > discuss it.
> > > 
> > > This fixes some omissions in the previous patch and error in its test 
> > > cases.
> > > 
> > > CLUSTER ON recurses to children, since I think a clustered parent index 
> > > means
> > > that all its child indexes are clustered.  "SET WITHOUT CLUSTER" doesn't 
> > > have
> > > to recurse to children, but I did it like that for consistency and it 
> > > avoids
> > > the need to special case InvalidOid.
> > 
> > The previous patch failed pg_upgrade when restoring a clustered, parent 
> > index,
> > since it's marked INVALID until indexes have been built on all child 
> > tables, so
> > CLUSTER ON was rejected on invalid index.
> > 
> > So I think CLUSTER ON needs to be a separate pg_dump object, to allow 
> > attaching
> > the child index (thereby making the parent "valid") to happen before SET
> > CLUSTER on the parent index.
> 
> Rebased on b5913f612 and now a3dc92600.

This resolves ORDER BY test failure with COLLATE "C".

-- 
Justin
>From 1101d6b8a44f5cc170816f305476750352dc06de Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 26 Nov 2020 14:37:08 -0600
Subject: [PATCH v7 1/7] pg_dump: make CLUSTER ON a separate dump object..

..since it needs to be restored after any child indexes are restored *and
attached*.  The order needs to be:

1) restore child and parent index (order doesn't matter);
2) attach child index;
3) set cluster on child and parent index (order doesn't matter);
---
 src/bin/pg_dump/pg_dump.c      | 86 ++++++++++++++++++++++++++--------
 src/bin/pg_dump/pg_dump.h      |  8 ++++
 src/bin/pg_dump/pg_dump_sort.c |  8 ++++
 3 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d99b61e621..8dc8a42964 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -208,6 +208,7 @@ static void dumpSequence(Archive *fout, TableInfo *tbinfo);
 static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
 static void dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo);
+static void dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo);
 static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
@@ -7092,6 +7093,11 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 				i_inddependcollversions;
 	int			ntups;
 
+	int	ncluster = 0;
+	IndexClusterInfo *clusterinfo;
+	clusterinfo = (IndexClusterInfo *)
+		pg_malloc0(numTables * sizeof(IndexClusterInfo));
+
 	for (i = 0; i < numTables; i++)
 	{
 		TableInfo  *tbinfo = &tblinfo[i];
@@ -7471,6 +7477,27 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 				/* Plain secondary index */
 				indxinfo[j].indexconstraint = 0;
 			}
+
+			/* Record each table's CLUSTERed index, if any */
+			if (indxinfo[j].indisclustered)
+			{
+				IndxInfo   *index = &indxinfo[j];
+				IndexClusterInfo *cluster = &clusterinfo[ncluster];
+
+				cluster->dobj.objType = DO_INDEX_CLUSTER_ON;
+				cluster->dobj.catId.tableoid = 0;
+				cluster->dobj.catId.oid = 0;
+				AssignDumpId(&cluster->dobj);
+				cluster->dobj.name = pg_strdup(index->dobj.name);
+				cluster->dobj.namespace = index->indextable->dobj.namespace;
+				cluster->index = index;
+				cluster->indextable = &tblinfo[i];
+
+				/* The CLUSTER ON depends on its index.. */
+				addObjectDependency(&cluster->dobj, index->dobj.dumpId);
+
+				ncluster++;
+			}
 		}
 
 		PQclear(res);
@@ -10323,6 +10350,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 		case DO_SUBSCRIPTION:
 			dumpSubscription(fout, (SubscriptionInfo *) dobj);
 			break;
+		case DO_INDEX_CLUSTER_ON:
+			dumpIndexClusterOn(fout, (IndexClusterInfo *) dobj);
+			break;
 		case DO_PRE_DATA_BOUNDARY:
 		case DO_POST_DATA_BOUNDARY:
 			/* never dumped, nothing to do */
@@ -16543,6 +16573,41 @@ getAttrName(int attrnum, TableInfo *tblInfo)
 	return NULL;				/* keep compiler quiet */
 }
 
+/*
+ * dumpIndexClusterOn
+ *	  record that the index is clustered.
+ */
+static void
+dumpIndexClusterOn(Archive *fout, IndexClusterInfo *clusterinfo)
+{
+	DumpOptions *dopt = fout->dopt;
+	TableInfo	*tbinfo = clusterinfo->indextable;
+	char		*qindxname;
+	PQExpBuffer	q;
+
+	if (dopt->dataOnly)
+		return;
+
+	q = createPQExpBuffer();
+	qindxname = pg_strdup(fmtId(clusterinfo->dobj.name));
+
+	/* index name is not qualified in this syntax */
+	appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER ON %s;\n",
+					  fmtQualifiedDumpable(tbinfo), qindxname);
+
+	if (clusterinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
+		ArchiveEntry(fout, clusterinfo->dobj.catId, clusterinfo->dobj.dumpId,
+					 ARCHIVE_OPTS(.tag = clusterinfo->dobj.name,
+								  .namespace = tbinfo->dobj.namespace->dobj.name,
+								  .owner = tbinfo->rolname,
+								  .description = "INDEX CLUSTER ON",
+								  .section = SECTION_POST_DATA,
+								  .createStmt = q->data));
+
+	destroyPQExpBuffer(q);
+	free(qindxname);
+}
+
 /*
  * dumpIndex
  *	  write out to fout a user-defined index
@@ -16597,16 +16662,6 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
 		 * similar code in dumpConstraint!
 		 */
 
-		/* If the index is clustered, we need to record that. */
-		if (indxinfo->indisclustered)
-		{
-			appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER",
-							  fmtQualifiedDumpable(tbinfo));
-			/* index name is not qualified in this syntax */
-			appendPQExpBuffer(q, " ON %s;\n",
-							  qindxname);
-		}
-
 		/*
 		 * If the index has any statistics on some of its columns, generate
 		 * the associated ALTER INDEX queries.
@@ -16933,16 +16988,6 @@ dumpConstraint(Archive *fout, ConstraintInfo *coninfo)
 		 * similar code in dumpIndex!
 		 */
 
-		/* If the index is clustered, we need to record that. */
-		if (indxinfo->indisclustered)
-		{
-			appendPQExpBuffer(q, "\nALTER TABLE %s CLUSTER",
-							  fmtQualifiedDumpable(tbinfo));
-			/* index name is not qualified in this syntax */
-			appendPQExpBuffer(q, " ON %s;\n",
-							  fmtId(indxinfo->dobj.name));
-		}
-
 		/* If the index defines identity, we need to record that. */
 		if (indxinfo->indisreplident)
 		{
@@ -18448,6 +18493,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
 				break;
 			case DO_INDEX:
 			case DO_INDEX_ATTACH:
+			case DO_INDEX_CLUSTER_ON:
 			case DO_STATSEXT:
 			case DO_REFRESH_MATVIEW:
 			case DO_TRIGGER:
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 1290f9659b..57def4c009 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -54,6 +54,7 @@ typedef enum
 	DO_ATTRDEF,
 	DO_INDEX,
 	DO_INDEX_ATTACH,
+	DO_INDEX_CLUSTER_ON,
 	DO_STATSEXT,
 	DO_RULE,
 	DO_TRIGGER,
@@ -386,6 +387,13 @@ typedef struct _indxInfo
 	DumpId		indexconstraint;
 } IndxInfo;
 
+typedef struct _indexClusterInfo
+{
+	DumpableObject dobj;
+	TableInfo  *indextable;		/* link to table the index is for */
+	IndxInfo   *index;		/* link to index itself */
+} IndexClusterInfo;
+
 typedef struct _indexAttachInfo
 {
 	DumpableObject dobj;
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 46461fb6a1..dd5b233196 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -75,6 +75,7 @@ enum dbObjectTypePriorities
 	PRIO_CONSTRAINT,
 	PRIO_INDEX,
 	PRIO_INDEX_ATTACH,
+	PRIO_INDEX_CLUSTER_ON,
 	PRIO_STATSEXT,
 	PRIO_RULE,
 	PRIO_TRIGGER,
@@ -108,6 +109,7 @@ static const int dbObjectTypePriority[] =
 	PRIO_ATTRDEF,				/* DO_ATTRDEF */
 	PRIO_INDEX,					/* DO_INDEX */
 	PRIO_INDEX_ATTACH,			/* DO_INDEX_ATTACH */
+	PRIO_INDEX_CLUSTER_ON,			/* DO_INDEX_CLUSTER_ON */
 	PRIO_STATSEXT,				/* DO_STATSEXT */
 	PRIO_RULE,					/* DO_RULE */
 	PRIO_TRIGGER,				/* DO_TRIGGER */
@@ -136,6 +138,7 @@ static const int dbObjectTypePriority[] =
 	PRIO_PUBLICATION,			/* DO_PUBLICATION */
 	PRIO_PUBLICATION_REL,		/* DO_PUBLICATION_REL */
 	PRIO_SUBSCRIPTION			/* DO_SUBSCRIPTION */
+
 };
 
 StaticAssertDecl(lengthof(dbObjectTypePriority) == (DO_SUBSCRIPTION + 1),
@@ -1348,6 +1351,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
 					 "INDEX ATTACH %s  (ID %d)",
 					 obj->name, obj->dumpId);
 			return;
+		case DO_INDEX_CLUSTER_ON:
+			snprintf(buf, bufsize,
+					 "INDEX CLUSTER ON %s  (ID %d)",
+					 obj->name, obj->dumpId);
+			return;
 		case DO_STATSEXT:
 			snprintf(buf, bufsize,
 					 "STATISTICS %s  (ID %d OID %u)",
-- 
2.17.0

>From 0004939e44906d7b32f5f6239109cc25ac72e301 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 16:58:42 -0500
Subject: [PATCH v7 2/7] Implement CLUSTER of partitioned table..

This requires either specification of a partitioned index on which to cluster,
or that an partitioned index was previously set clustered.
---
 doc/src/sgml/ref/cluster.sgml         |   6 +
 src/backend/commands/cluster.c        | 174 +++++++++++++++++++-------
 src/bin/psql/tab-complete.c           |   1 +
 src/include/commands/cluster.h        |   1 +
 src/test/regress/expected/cluster.out |  58 ++++++++-
 src/test/regress/sql/cluster.sql      |  24 +++-
 6 files changed, 209 insertions(+), 55 deletions(-)

diff --git a/doc/src/sgml/ref/cluster.sgml b/doc/src/sgml/ref/cluster.sgml
index 5dd21a0189..fb5deddb35 100644
--- a/doc/src/sgml/ref/cluster.sgml
+++ b/doc/src/sgml/ref/cluster.sgml
@@ -192,6 +192,12 @@ CLUSTER [VERBOSE]
     are periodically reclustered.
    </para>
 
+   <para>
+    Clustering a partitioned table clusters each of its partitions using the
+    index partition of the given partitioned index or (if not specified) the
+    partitioned index marked as clustered.
+   </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 096a06f7b3..9b6673867c 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -32,7 +32,9 @@
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
 #include "catalog/toasting.h"
 #include "commands/cluster.h"
 #include "commands/defrem.h"
@@ -73,6 +75,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+		Oid indexOid);
+static void cluster_multiple_rels(List *rvs, int options);
 
 
 /*---------------------------------------------------------------------------
@@ -135,7 +140,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 											AccessExclusiveLock,
 											0,
 											RangeVarCallbackOwnsTable, NULL);
-		rel = table_open(tableOid, NoLock);
+		rel = table_open(tableOid, ShareUpdateExclusiveLock);
 
 		/*
 		 * Reject clustering a remote temp table ... their local buffer
@@ -146,14 +151,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("cannot cluster temporary tables of other sessions")));
 
-		/*
-		 * Reject clustering a partitioned table.
-		 */
-		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot cluster a partitioned table")));
-
 		if (stmt->indexname == NULL)
 		{
 			ListCell   *index;
@@ -189,10 +186,34 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 		}
 
 		/* close relation, keep lock till commit */
-		table_close(rel, NoLock);
+		table_close(rel, ShareUpdateExclusiveLock);
 
-		/* Do the job. */
-		cluster_rel(tableOid, indexOid, &params);
+		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			/* Do the job. */
+			cluster_rel(tableOid, indexOid, &params);
+		}
+		else
+		{
+			List	   *rvs;
+			MemoryContext cluster_context;
+
+			/* Refuse to hold strong locks in a user transaction */
+			PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+			cluster_context = AllocSetContextCreate(PortalContext,
+												"Cluster",
+												ALLOCSET_DEFAULT_SIZES);
+
+			rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+			cluster_multiple_rels(rvs, params.options);
+
+			/* Start a new transaction for the cleanup work. */
+			StartTransactionCommand();
+
+			/* Clean up working storage */
+			MemoryContextDelete(cluster_context);
+		}
 	}
 	else
 	{
@@ -202,7 +223,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 		 */
 		MemoryContext cluster_context;
 		List	   *rvs;
-		ListCell   *rv;
 
 		/*
 		 * We cannot run this form of CLUSTER inside a user transaction block;
@@ -225,28 +245,7 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
 		 * cluster_context.
 		 */
 		rvs = get_tables_to_cluster(cluster_context);
-
-		/* Commit to get out of starting transaction */
-		PopActiveSnapshot();
-		CommitTransactionCommand();
-
-		/* Ok, now that we've got them all, cluster them one by one */
-		foreach(rv, rvs)
-		{
-			RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
-			ClusterParams cluster_params = params;
-
-			/* Start a new transaction for each relation. */
-			StartTransactionCommand();
-			/* functions in indexes may want a snapshot set */
-			PushActiveSnapshot(GetTransactionSnapshot());
-			/* Do the job. */
-			cluster_params.options |= CLUOPT_RECHECK;
-			cluster_rel(rvtc->tableOid, rvtc->indexOid,
-						&cluster_params);
-			PopActiveSnapshot();
-			CommitTransactionCommand();
-		}
+		cluster_multiple_rels(rvs, params.options | CLUOPT_RECHECK_ISCLUSTERED);
 
 		/* Start a new transaction for the cleanup work. */
 		StartTransactionCommand();
@@ -352,9 +351,10 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 			}
 
 			/*
-			 * Check that the index is still the one with indisclustered set.
+			 * Check that the index is still the one with indisclustered set, if needed.
 			 */
-			if (!get_index_isclustered(indexOid))
+			if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
+					!get_index_isclustered(indexOid))
 			{
 				relation_close(OldHeap, AccessExclusiveLock);
 				pgstat_progress_end_command();
@@ -398,8 +398,13 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 
 	/* Check heap and index are valid to cluster on */
 	if (OidIsValid(indexOid))
+	{
 		check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
 
+		/* Mark the index as clustered */
+		mark_index_clustered(OldHeap, indexOid, true);
+	}
+
 	/*
 	 * Quietly ignore the request if this is a materialized view which has not
 	 * been populated from its query. No harm is done because there is no data
@@ -415,6 +420,14 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
 		return;
 	}
 
+	/* For a partitioned rel, we're done. */
+	if (!RELKIND_HAS_STORAGE(get_rel_relkind(tableOid)))
+	{
+		relation_close(OldHeap, AccessExclusiveLock);
+		pgstat_progress_end_command();
+		return;
+	}
+
 	/*
 	 * All predicate locks on the tuples or pages are about to be made
 	 * invalid, because we move tuples around.  Promote them to relation
@@ -483,6 +496,9 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD
 	 * the worst consequence of following broken HOT chains would be that we
 	 * might put recently-dead tuples out-of-order in the new table, and there
 	 * is little harm in that.)
+	 *
+	 * This also refuses to cluster on an "incomplete" partitioned index
+	 * created with "ON ONLY".
 	 */
 	if (!OldIndex->rd_index->indisvalid)
 		ereport(ERROR,
@@ -507,12 +523,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	Relation	pg_index;
 	ListCell   *index;
 
-	/* Disallow applying to a partitioned table */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("cannot mark index clustered in partitioned table")));
-
 	/*
 	 * If the index is already marked clustered, no need to do anything.
 	 */
@@ -584,10 +594,6 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
 	TransactionId frozenXid;
 	MultiXactId cutoffMulti;
 
-	/* Mark the correct index as clustered */
-	if (OidIsValid(indexOid))
-		mark_index_clustered(OldHeap, indexOid, true);
-
 	/* Remember info about rel before closing OldHeap */
 	relpersistence = OldHeap->rd_rel->relpersistence;
 	is_system_catalog = IsSystemRelation(OldHeap);
@@ -1582,3 +1588,77 @@ get_tables_to_cluster(MemoryContext cluster_context)
 
 	return rvs;
 }
+
+/*
+ * Return a List of tables and associated index, where each index is a
+ * partition of the given index
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+	List		*inhoids;
+	ListCell	*lc;
+	List		*rvs = NIL;
+	MemoryContext	old_context;
+
+	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+
+	foreach(lc, inhoids)
+	{
+		Oid		indexrelid = lfirst_oid(lc);
+		Oid		relid = IndexGetRelation(indexrelid, false);
+		RelToCluster	*rvtc;
+
+		/*
+		 * Partitioned rels are also processed by cluster_rel, to
+		 * call check_index_is_clusterable() and mark_index_clustered().
+		 */
+
+		/*
+		 * We have to build the list in a different memory context so it will
+		 * survive the cross-transaction processing
+		 */
+		old_context = MemoryContextSwitchTo(cluster_context);
+
+		rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rvtc->tableOid = relid;
+		rvtc->indexOid = indexrelid;
+		rvs = lappend(rvs, rvtc);
+
+		MemoryContextSwitchTo(old_context);
+	}
+
+	return rvs;
+}
+
+/* Cluster each relation in a separate transaction */
+static void
+cluster_multiple_rels(List *rvs, int options)
+{
+	ListCell *lc;
+
+	/* Commit to get out of starting transaction */
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+
+	/* Ok, now that we've got them all, cluster them one by one */
+	foreach(lc, rvs)
+	{
+		RelToCluster *rvtc = (RelToCluster *) lfirst(lc);
+		ClusterParams cluster_params = { .options = options, };
+
+		/* Start a new transaction for each relation. */
+		StartTransactionCommand();
+
+		/* functions in indexes may want a snapshot set */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		/* Do the job. */
+		cluster_params.options |= CLUOPT_RECHECK;
+		cluster_rel(rvtc->tableOid, rvtc->indexOid,
+					&cluster_params);
+
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+	}
+}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index a75647b1cc..781aa95abc 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -588,6 +588,7 @@ static const SchemaQuery Query_for_list_of_clusterables = {
 	.catname = "pg_catalog.pg_class c",
 	.selcondition =
 	"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+	CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
 	CppAsString2(RELKIND_MATVIEW) ")",
 	.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
 	.namespace = "c.relnamespace",
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index a941f2accd..c30ca01726 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -22,6 +22,7 @@
 /* flag bits for ClusterParams->flags */
 #define CLUOPT_RECHECK 0x01		/* recheck relation state */
 #define CLUOPT_VERBOSE 0x02		/* print progress info */
+#define CLUOPT_RECHECK_ISCLUSTERED 0x04	/* recheck relation state for indisclustered */
 
 /* options for CLUSTER */
 typedef struct ClusterParams
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index bdae8fe00c..c74cfa88cc 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -439,14 +439,62 @@ select * from clstr_temp;
 
 drop table clstr_temp;
 RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+ERROR:  cannot cluster on invalid index "clstrpart_only_idx"
+DROP INDEX clstrpart_only_idx;
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
-ERROR:  cannot mark index clustered in partitioned table
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
 CLUSTER clstrpart USING clstrpart_idx;
-ERROR:  cannot cluster a partitioned table
-DROP TABLE clstrpart;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C";
+   relname   | level | relkind | ?column? 
+-------------+-------+---------+----------
+ clstrpart   |     0 | p       | t
+ clstrpart1  |     1 | p       | t
+ clstrpart11 |     2 | r       | f
+ clstrpart12 |     2 | p       | t
+ clstrpart2  |     1 | r       | f
+ clstrpart3  |     1 | p       | t
+ clstrpart33 |     2 | r       | f
+(7 rows)
+
+-- Check that clustering sets new indisclustered:
+SELECT relname, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY relname COLLATE "C";
+      relname      | relkind | indisclustered 
+-------------------+---------+----------------
+ clstrpart11_a_idx | i       | t
+ clstrpart12_a_idx | I       | t
+ clstrpart1_a_idx  | I       | t
+ clstrpart2_a_idx  | i       | t
+ clstrpart33_a_idx | i       | t
+ clstrpart3_a_idx  | I       | t
+ clstrpart_idx     | I       | t
+(7 rows)
+
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a) CLUSTER
+Number of partitions: 3 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 188183647c..9bcc77695c 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -196,12 +196,30 @@ drop table clstr_temp;
 
 RESET SESSION AUTHORIZATION;
 
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE(a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+DROP INDEX clstrpart_only_idx;
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
 CLUSTER clstrpart USING clstrpart_idx;
-DROP TABLE clstrpart;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C";
+-- Check that clustering sets new indisclustered:
+SELECT relname, relkind, indisclustered FROM pg_partition_tree('clstrpart_idx'::regclass) AS tree JOIN pg_index i ON i.indexrelid=tree.relid JOIN pg_class c ON c.oid=indexrelid ORDER BY relname COLLATE "C";
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 4995e70a192989ecda4d1b3798c0d00ba47f06c4 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 6 Oct 2020 22:11:12 -0500
Subject: [PATCH v7 3/7] Propagate changes to indisclustered to child/parents

---
 src/backend/commands/cluster.c        | 109 ++++++++++++++++----------
 src/backend/commands/indexcmds.c      |   2 +
 src/test/regress/expected/cluster.out |  46 +++++++++++
 src/test/regress/sql/cluster.sql      |  11 +++
 4 files changed, 125 insertions(+), 43 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 9b6673867c..78c2c2ba72 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -74,6 +74,7 @@ static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
+static void set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
 static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
 		Oid indexOid);
@@ -510,66 +511,88 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMOD
 	index_close(OldIndex, NoLock);
 }
 
+/*
+ * Helper for mark_index_clustered
+ * Mark a single index as clustered or not.
+ * pg_index is passed by caller to avoid repeatedly re-opening it.
+ */
+static void
+set_indisclustered(Oid indexOid, bool isclustered, Relation pg_index)
+{
+	HeapTuple	indexTuple;
+	Form_pg_index indexForm;
+
+	indexTuple = SearchSysCacheCopy1(INDEXRELID,
+									ObjectIdGetDatum(indexOid));
+	if (!HeapTupleIsValid(indexTuple))
+		elog(ERROR, "cache lookup failed for index %u", indexOid);
+	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+	/* this was checked earlier, but let's be real sure */
+	if (isclustered && !indexForm->indisvalid)
+		elog(ERROR, "cannot cluster on invalid index %u", indexOid);
+
+	indexForm->indisclustered = isclustered;
+	CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
+	heap_freetuple(indexTuple);
+}
+
 /*
  * mark_index_clustered: mark the specified index as the one clustered on
  *
- * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
+ * With indexOid == InvalidOid, mark all indexes of rel not-clustered.
+ * Otherwise, mark children of the clustered index as clustered, and parents of
+ * other indexes as unclustered.
+ * We wish to maintain the following properties:
+ * 1) Only one index on a relation can be marked clustered at once
+ * 2) If a partitioned index is clustered, then all its children must be
+ *     clustered.
  */
 void
 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 {
-	HeapTuple	indexTuple;
-	Form_pg_index indexForm;
-	Relation	pg_index;
-	ListCell   *index;
-
-	/*
-	 * If the index is already marked clustered, no need to do anything.
-	 */
-	if (OidIsValid(indexOid))
-	{
-		if (get_index_isclustered(indexOid))
-			return;
-	}
+	ListCell	*lc, *lc2;
+	List		*indexes;
+	Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
+	List		*inh = find_all_inheritors(RelationGetRelid(rel), ShareRowExclusiveLock, NULL);
 
 	/*
 	 * Check each index of the relation and set/clear the bit as needed.
+	 * Iterate over the relation's children rather than the index's children
+	 * since we need to unset cluster for indexes on intermediate children,
+	 * too.
 	 */
-	pg_index = table_open(IndexRelationId, RowExclusiveLock);
-
-	foreach(index, RelationGetIndexList(rel))
+	foreach(lc, inh)
 	{
-		Oid			thisIndexOid = lfirst_oid(index);
-
-		indexTuple = SearchSysCacheCopy1(INDEXRELID,
-										 ObjectIdGetDatum(thisIndexOid));
-		if (!HeapTupleIsValid(indexTuple))
-			elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
-		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+		Oid			inhrelid = lfirst_oid(lc);
+		Relation	thisrel = table_open(inhrelid, ShareRowExclusiveLock);
 
-		/*
-		 * Unset the bit if set.  We know it's wrong because we checked this
-		 * earlier.
-		 */
-		if (indexForm->indisclustered)
+		indexes = RelationGetIndexList(thisrel);
+		foreach (lc2, indexes)
 		{
-			indexForm->indisclustered = false;
-			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
-		}
-		else if (thisIndexOid == indexOid)
-		{
-			/* this was checked earlier, but let's be real sure */
-			if (!indexForm->indisvalid)
-				elog(ERROR, "cannot cluster on invalid index %u", indexOid);
-			indexForm->indisclustered = true;
-			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
-		}
+			bool	isclustered;
+			Oid		thisIndexOid = lfirst_oid(lc2);
+			List	*parentoids = get_rel_relispartition(thisIndexOid) ?
+				get_partition_ancestors(thisIndexOid) : NIL;
 
-		InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
-									 InvalidOid, is_internal);
+			/*
+			 * A child of the clustered index must be set clustered;
+			 * indexes which are not children of the clustered index are
+			 * set unclustered
+			 */
+			isclustered = (thisIndexOid == indexOid) ||
+					list_member_oid(parentoids, indexOid);
+			Assert(OidIsValid(indexOid) || !isclustered);
+			set_indisclustered(thisIndexOid, isclustered, pg_index);
+
+			InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
+										 InvalidOid, is_internal);
+		}
 
-		heap_freetuple(indexTuple);
+		list_free(indexes);
+		table_close(thisrel, ShareRowExclusiveLock);
 	}
+	list_free(inh);
 
 	table_close(pg_index, RowExclusiveLock);
 }
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 127ba7835d..4ca1ffbfa4 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -25,6 +25,7 @@
 #include "catalog/catalog.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
@@ -32,6 +33,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
+#include "commands/cluster.h"
 #include "commands/comment.h"
 #include "commands/dbcommands.h"
 #include "commands/defrem.h"
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index c74cfa88cc..1d436dfaae 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -495,6 +495,52 @@ Indexes:
     "clstrpart_idx" btree (a) CLUSTER
 Number of partitions: 3 (Use \d+ to list them.)
 
+-- Test that it recurses to grandchildren:
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a) CLUSTER
+
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a)
+
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+\d clstrpart33
+            Table "public.clstrpart33"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart3 DEFAULT
+Indexes:
+    "clstrpart33_a_idx" btree (a) CLUSTER
+
+-- Check that only one child is marked clustered after marking clustered on a different parent
+CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
+\d clstrpart1
+       Partitioned table "public.clstrpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart FOR VALUES FROM (1) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart1_a_idx" btree (a)
+    "clstrpart1_idx_2" btree (a) CLUSTER
+Number of partitions: 2 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 9bcc77695c..0ded2be1ca 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -220,6 +220,17 @@ CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitio
 CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
 CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
 \d clstrpart
+-- Test that it recurses to grandchildren:
+\d clstrpart33
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+\d clstrpart33
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+\d clstrpart33
+-- Check that only one child is marked clustered after marking clustered on a different parent
+CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
+\d clstrpart1
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From c8afb42bc44ab8cc6349bd53614bb6da72da7afa Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Nov 2020 18:58:03 -0600
Subject: [PATCH v7 4/7] Invalidate parent indexes

---
 src/backend/commands/cluster.c        | 21 +++++++++++++++++++++
 src/test/regress/expected/cluster.out | 26 ++++++++++++++++++++++++++
 src/test/regress/sql/cluster.sql      |  8 ++++++++
 3 files changed, 55 insertions(+)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 78c2c2ba72..05c84e5582 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -594,6 +594,27 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	}
 	list_free(inh);
 
+	/*
+	 * Set parent of all indexes as unclustered when a rel is unclustered; and,
+	 * when an index is clustered, set parents of all /other/ indexes as
+	 * unclustered.
+	 */
+	indexes = RelationGetIndexList(rel);
+	foreach (lc, indexes)
+	{
+		Oid	thisIndexOid = lfirst_oid(lc);
+
+		if (thisIndexOid == indexOid)
+			continue;
+
+		while (get_rel_relispartition(thisIndexOid))
+		{
+			thisIndexOid = get_partition_parent(thisIndexOid);
+			set_indisclustered(thisIndexOid, false, pg_index);
+		}
+	}
+	list_free(indexes);
+
 	table_close(pg_index, RowExclusiveLock);
 }
 
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 1d436dfaae..6cba3cc4f9 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -541,6 +541,32 @@ Indexes:
     "clstrpart1_idx_2" btree (a) CLUSTER
 Number of partitions: 2 (Use \d+ to list them.)
 
+-- Check that the parent index is marked not clustered after clustering a partition on a different index:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_idx_2;
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
+-- Check that the parent index is marked not clustered after setting a partition not clustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 0ded2be1ca..a1d132f288 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -231,6 +231,14 @@ CREATE INDEX clstrpart1_idx_2 ON clstrpart1(a);
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ALTER TABLE clstrpart1 CLUSTER ON clstrpart1_idx_2;
 \d clstrpart1
+-- Check that the parent index is marked not clustered after clustering a partition on a different index:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_idx_2;
+\d clstrpart
+-- Check that the parent index is marked not clustered after setting a partition not clustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From e92499f78950405b1b8f92c6240909335bdebc17 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Nov 2020 19:11:41 -0600
Subject: [PATCH v7 5/7] Invalidate parent index cluster on attach

---
 src/backend/commands/indexcmds.c      | 21 +++++++++++++++++++++
 src/test/regress/expected/cluster.out | 14 ++++++++++++++
 src/test/regress/sql/cluster.sql      |  5 +++++
 3 files changed, 40 insertions(+)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 4ca1ffbfa4..cc57c149ed 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -4117,6 +4117,27 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
 	/* set relispartition correctly on the partition */
 	update_relispartition(partRelid, OidIsValid(parentOid));
 
+	/*
+	 * If the attached index is not clustered, invalidate cluster mark on
+	 * any parents
+	 */
+	if ((OidIsValid(parentOid) && get_index_isclustered(parentOid)) ||
+			get_index_isclustered(partRelid))
+	{
+		Relation indrel;
+
+		/* Make relispartition visible */
+		CommandCounterIncrement();
+
+		indrel = table_open(IndexGetRelation(partRelid, false),
+				ShareUpdateExclusiveLock);
+		mark_index_clustered(indrel,
+				get_index_isclustered(partRelid) ? partRelid : InvalidOid,
+				true);
+		table_close(indrel, ShareUpdateExclusiveLock);
+
+	}
+
 	if (fix_dependencies)
 	{
 		/*
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 6cba3cc4f9..e7f0889743 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -567,6 +567,20 @@ Indexes:
     "clstrpart_idx" btree (a)
 Number of partitions: 3 (Use \d+ to list them.)
 
+-- Check that attaching an unclustered index marks the parent unclustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
+ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a)
+Number of partitions: 4 (Use \d+ to list them.)
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index a1d132f288..3c8085c69e 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -239,6 +239,11 @@ CLUSTER clstrpart1 USING clstrpart1_idx_2;
 ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 ALTER TABLE clstrpart1 SET WITHOUT CLUSTER;
 \d clstrpart
+-- Check that attaching an unclustered index marks the parent unclustered:
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
+ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
+\d clstrpart
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From e91b525beafbc93223807c68d557b95f7dce3e94 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 6 Oct 2020 20:40:18 -0500
Subject: [PATCH v7 6/7] Preserve indisclustered on children of clustered,
 partitioned indexes

Note, this takes a parentIndex, but that wasn't previously used ...
UpdateIndexRelation(Oid indexoid, Oid heapoid, Oid parentIndexId,
---
 src/backend/catalog/index.c           |  2 +-
 src/test/regress/expected/cluster.out | 12 ++++++++++++
 src/test/regress/sql/cluster.sql      |  4 ++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 1cb9172a5f..3611da45e6 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -604,7 +604,7 @@ UpdateIndexRelation(Oid indexoid,
 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
-	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
+	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(OidIsValid(parentIndexId) && get_index_isclustered(parentIndexId));
 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index e7f0889743..a3943c13f5 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -581,6 +581,18 @@ Indexes:
     "clstrpart_idx" btree (a)
 Number of partitions: 4 (Use \d+ to list them.)
 
+-- Check that new children inherit clustered mark
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40);
+\d clstrpart4
+             Table "public.clstrpart4"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition of: clstrpart FOR VALUES FROM (30) TO (40)
+Indexes:
+    "clstrpart4_a_idx" btree (a) CLUSTER
+
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
 create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 3c8085c69e..22628e90ca 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -244,6 +244,10 @@ ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
 CREATE TABLE clstrpart5 (LIKE clstrpart INCLUDING INDEXES);
 ALTER TABLE clstrpart ATTACH PARTITION clstrpart5 FOR VALUES FROM (40)TO(50);
 \d clstrpart
+-- Check that new children inherit clustered mark
+ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+CREATE TABLE clstrpart4 PARTITION OF clstrpart FOR VALUES FROM (30)TO(40);
+\d clstrpart4
 
 -- Test CLUSTER with external tuplesorting
 
-- 
2.17.0

>From 9ac8ebd70cc50132f16272df3eae7878b69d6082 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Wed, 25 Nov 2020 17:34:07 -0600
Subject: [PATCH v7 7/7] pg_dump: partitioned index depend on its partitions

This is required for restoring clustered parent index, which is marked INVALID
until indexes have been built on all its child tables, and it's prohibited to
CLUSTER ON an INVALID index

See also: 8cff4f5348d075e063100071013f00a900c32b0f
---
 src/backend/commands/tablecmds.c | 6 +++---
 src/bin/pg_dump/common.c         | 8 ++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 420991e315..a478c13990 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -17521,6 +17521,9 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
 		table_close(idxRel, RowExclusiveLock);
 	}
 
+	/* make sure we see the validation we just did */
+	CommandCounterIncrement();
+
 	/*
 	 * If this index is in turn a partition of a larger index, validating it
 	 * might cause the parent to become valid also.  Try that.
@@ -17532,9 +17535,6 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl)
 		Relation	parentIdx,
 					parentTbl;
 
-		/* make sure we see the validation we just did */
-		CommandCounterIncrement();
-
 		parentIdxId = get_partition_parent(RelationGetRelid(partedIdx));
 		parentTblId = get_partition_parent(RelationGetRelid(partedTbl));
 		parentIdx = relation_open(parentIdxId, AccessExclusiveLock);
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index 1a261a5545..cdfba058fc 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -429,6 +429,12 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			attachinfo[k].parentIdx = parentidx;
 			attachinfo[k].partitionIdx = index;
 
+			/*
+			 * We want dependencies from parent to partition (so that the
+			 * partition index is created first)
+			 */
+			addObjectDependency(&parentidx->dobj, index->dobj.dumpId);
+
 			/*
 			 * We must state the DO_INDEX_ATTACH object's dependencies
 			 * explicitly, since it will not match anything in pg_depend.
@@ -446,6 +452,8 @@ flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 			 */
 			addObjectDependency(&attachinfo[k].dobj, index->dobj.dumpId);
 			addObjectDependency(&attachinfo[k].dobj, parentidx->dobj.dumpId);
+			// addObjectDependency(&parentidx->dobj, attachinfo[k].dobj.dumpId);
+
 			addObjectDependency(&attachinfo[k].dobj,
 								index->indextable->dobj.dumpId);
 			addObjectDependency(&attachinfo[k].dobj,
-- 
2.17.0

Reply via email to