Thanks for looking.

On Sun, Aug 09, 2020 at 02:00:09PM +0900, Michael Paquier wrote:
> > exactly what's needed.
> 
> For now, I would recommend to focus first on 0001 to add support for
> partitioned tables and indexes to REINDEX.  CIC is much more
> complicated btw, but I am not entering in the details now.
> 
> +   /* Avoid erroring out */
>     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
>     {
> This comment does not help, and actually this becomes incorrect as
> reindex for this relkind becomes supported once 0001 is done.

I made a minimal change to avoid forgetting to eventually change that part.

> ReindexPartitionedRel() fails to consider the concurrent case here for
> partition indexes and tables, as reindex_index()/reindex_relation()
> are the APIs used in the non-concurrent case.  Once you consider the
> concurrent case correctly, we also need to be careful with partitions
> that have a temporary persistency (note that we don't allow partition
> trees to mix persistency types, all partitions have to be temporary or
> permanent).

Fixed.

> I think that you are right to make the entry point to handle
> partitioned index in ReindexIndex() and partitioned table in
> ReindexTable(), but the structure of the patch should be different:
> - The second portion of ReindexMultipleTables() should be moved into a
> separate routine, taking in input a list of relation OIDs.  This needs
> to be extended a bit so as reindex_index() gets called for an index
> relkind if the relpersistence is temporary or if we have a
> non-concurrent reindex.  The idea is that we finish with a single code
> path able to work on a list of relations.  And your patch adds more of
> that as of ReindexPartitionedRel().

It's a good idea.

> - We should *not* handle directly partitioned index and/or table in
> ReindexRelationConcurrently() to not complicate the logic where we
> gather all the indexes of a table/matview.  So I think that the list
> of partition indexes/tables to work on should be built directly in
> ReindexIndex() and ReindexTable(), and then this should call the
> second part of ReindexMultipleTables() refactored in the previous
> point.

I think I addressed these mostly as you intended.

> This way, each partition index gets done individually in its
> own transaction.  For a partition table, all indexes of this partition
> are rebuilt in the same set of transactions.  For the concurrent case,
> we have already reindex_concurrently_swap that it able to switch the
> dependencies of two indexes within a partition tree, so we can rely on
> that so as a failure in the middle of the operation never leaves the
> a partition structure in an inconsistent state.

-- 
Justin
>From 25486d56303de512368f322c87528c2be29af0de Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 15:59:54 -0500
Subject: [PATCH v5 1/3] Implement REINDEX of partitioned tables/indexes

---
 doc/src/sgml/ref/reindex.sgml              |   5 -
 src/backend/catalog/index.c                |   8 +-
 src/backend/commands/indexcmds.c           | 137 ++++++++++++++-------
 src/backend/tcop/utility.c                 |   6 +-
 src/include/commands/defrem.h              |   6 +-
 src/test/regress/expected/create_index.out |  18 +--
 src/test/regress/sql/create_index.sql      |  13 +-
 7 files changed, 122 insertions(+), 71 deletions(-)

diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml
index aac5d5be23..e2e32b3ba0 100644
--- a/doc/src/sgml/ref/reindex.sgml
+++ b/doc/src/sgml/ref/reindex.sgml
@@ -258,11 +258,6 @@ REINDEX [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] { IN
    can always reindex anything.
   </para>
 
-  <para>
-   Reindexing partitioned tables or partitioned indexes is not supported.
-   Each individual partition can be reindexed separately instead.
-  </para>
-
   <refsect2 id="sql-reindex-concurrently" xreflabel="Rebuilding Indexes Concurrently">
    <title>Rebuilding Indexes Concurrently</title>
 
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 1be27eec52..f69f027890 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3660,12 +3660,8 @@ reindex_relation(Oid relid, int flags, int options)
 	 */
 	rel = table_open(relid, ShareLock);
 
-	/*
-	 * This may be useful when implemented someday; but that day is not today.
-	 * For now, avoid erroring out when called in a multi-table context
-	 * (REINDEX SCHEMA) and happen to come across a partitioned table.  The
-	 * partitions may be reindexed on their own anyway.
-	 */
+	/* Skip partitioned tables if called in a multi-table context.  The
+	 * partitions are reindexed on their own. */
 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 	{
 		ereport(WARNING,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 2baca12c5f..68d75916ae 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -88,7 +88,10 @@ static List *ChooseIndexColumnNames(List *indexElems);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
 											Oid relId, Oid oldRelId, void *arg);
 static bool ReindexRelationConcurrently(Oid relationOid, int options);
-static void ReindexPartitionedIndex(Relation parentIdx);
+
+static List *PartitionedLeaves(Oid reloid);
+static void ReindexMultipleRelsWorker(List *relids, int options,
+		bool concurrent);
 static void update_relispartition(Oid relationId, bool newval);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 
@@ -2420,11 +2423,10 @@ ChooseIndexColumnNames(List *indexElems)
  *		Recreate a specific index.
  */
 void
-ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
+ReindexIndex(RangeVar *indexRelation, int options, bool concurrent, bool isTopLevel)
 {
 	struct ReindexIndexCallbackState state;
 	Oid			indOid;
-	Relation	irel;
 	char		persistence;
 
 	/*
@@ -2445,22 +2447,13 @@ ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
 									  RangeVarCallbackForReindexIndex,
 									  &state);
 
-	/*
-	 * Obtain the current persistence of the existing index.  We already hold
-	 * lock on the index.
-	 */
-	irel = index_open(indOid, NoLock);
-
-	if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
+	persistence = get_rel_persistence(indOid);
+	if (get_rel_relkind(indOid) == RELKIND_PARTITIONED_INDEX)
 	{
-		ReindexPartitionedIndex(irel);
-		return;
+		List *relids = PartitionedLeaves(indOid);
+		ReindexMultipleRelsWorker(relids, options, concurrent);
 	}
-
-	persistence = irel->rd_rel->relpersistence;
-	index_close(irel, NoLock);
-
-	if (concurrent && persistence != RELPERSISTENCE_TEMP)
+	else if (concurrent && persistence != RELPERSISTENCE_TEMP)
 		ReindexRelationConcurrently(indOid, options);
 	else
 		reindex_index(indOid, false, persistence,
@@ -2542,7 +2535,7 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation,
  *		Recreate all indexes of a table (and of its toast table, if any)
  */
 Oid
-ReindexTable(RangeVar *relation, int options, bool concurrent)
+ReindexTable(RangeVar *relation, int options, bool concurrent, bool isTopLevel)
 {
 	Oid			heapOid;
 	bool		result;
@@ -2560,7 +2553,12 @@ ReindexTable(RangeVar *relation, int options, bool concurrent)
 									   0,
 									   RangeVarCallbackOwnsTable, NULL);
 
-	if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
+	if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
+	{
+		List *relids = PartitionedLeaves(heapOid);
+		ReindexMultipleRelsWorker(relids, options, concurrent);
+	}
+	else if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
 	{
 		result = ReindexRelationConcurrently(heapOid, options);
 
@@ -2604,7 +2602,6 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 	MemoryContext private_context;
 	MemoryContext old;
 	List	   *relids = NIL;
-	ListCell   *l;
 	int			num_keys;
 	bool		concurrent_warning = false;
 
@@ -2688,11 +2685,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 		 * Only regular tables and matviews can have indexes, so ignore any
 		 * other kind of relation.
 		 *
-		 * It is tempting to also consider partitioned tables here, but that
-		 * has the problem that if the children are in the same schema, they
-		 * would be processed twice.  Maybe we could have a separate list of
-		 * partitioned tables, and expand that afterwards into relids,
-		 * ignoring any duplicates.
+		 * Partitioned tables/indexes are skipped but matching leaf
+		 * partitions are processed.
 		 */
 		if (classtuple->relkind != RELKIND_RELATION &&
 			classtuple->relkind != RELKIND_MATVIEW)
@@ -2755,22 +2749,59 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 	table_endscan(scan);
 	table_close(relationRelation, AccessShareLock);
 
-	/* Now reindex each rel in a separate transaction */
+	ReindexMultipleRelsWorker(relids, options, concurrent);
+	MemoryContextDelete(private_context);
+}
+
+/* Reindex each rel in a separate transaction */
+static void
+ReindexMultipleRelsWorker(List *relids, int options, bool concurrent)
+{
+	ListCell   *l;
+
+	/*
+	 * This cannot run inside a user transaction block; if
+	 * we were inside a transaction, then its commit- and
+	 * start-transaction-command calls would not have the
+	 * intended effect!
+	 */
+	// PreventInTransactionBlock(isTopLevel,
+		// "REINDEX of partitioned relations"); // XXX
+
 	PopActiveSnapshot();
 	CommitTransactionCommand();
+
 	foreach(l, relids)
 	{
 		Oid			relid = lfirst_oid(l);
+		char relkind;
 
 		StartTransactionCommand();
+
 		/* functions in indexes may want a snapshot set */
 		PushActiveSnapshot(GetTransactionSnapshot());
 
+		relkind = get_rel_relkind(relid);
+
+		if (relkind == RELKIND_PARTITIONED_INDEX ||
+			relkind == RELKIND_PARTITIONED_TABLE)
+		{
+			abort();
+			// ReindexPartitionedRel(relid, options, concurrent, true);
+		}
+
 		if (concurrent && get_rel_persistence(relid) != RELPERSISTENCE_TEMP)
 		{
+			/* Handles concurrent indexing of both indexes and tables */
 			(void) ReindexRelationConcurrently(relid, options);
 			/* ReindexRelationConcurrently() does the verbose output */
 		}
+		else if (relkind == RELKIND_INDEX)
+		{
+			reindex_index(relid, false, get_rel_persistence(relid),
+					options | REINDEXOPT_REPORT_PROGRESS);
+			PopActiveSnapshot();
+		}
 		else
 		{
 			bool		result;
@@ -2792,8 +2823,6 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 		CommitTransactionCommand();
 	}
 	StartTransactionCommand();
-
-	MemoryContextDelete(private_context);
 }
 
 
@@ -2805,8 +2834,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
  * view.  For tables and materialized views, all its indexes will be rebuilt,
  * excluding invalid indexes and any indexes used in exclusion constraints,
  * but including its associated toast table indexes.  For indexes, the index
- * itself will be rebuilt.  If 'relationOid' belongs to a partitioned table
- * then we issue a warning to mention these are not yet supported.
+ * itself will be rebuilt.
  *
  * The locks taken on parent tables and involved indexes are kept until the
  * transaction is committed, at which point a session lock is taken on each
@@ -3010,13 +3038,6 @@ ReindexRelationConcurrently(Oid relationOid, int options)
 				MemoryContextSwitchTo(oldcontext);
 				break;
 			}
-		case RELKIND_PARTITIONED_TABLE:
-			/* see reindex_relation() */
-			ereport(WARNING,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
-							get_rel_name(relationOid))));
-			return false;
 		default:
 			/* Return error if type of relation is not supported */
 			ereport(ERROR,
@@ -3478,17 +3499,41 @@ ReindexRelationConcurrently(Oid relationOid, int options)
 }
 
 /*
- *	ReindexPartitionedIndex
- *		Reindex each child of the given partitioned index.
- *
- * Not yet implemented.
+ *	PartitionedLeaves
+ *		Return a list of leaf partitions to be reindexed.
  */
-static void
-ReindexPartitionedIndex(Relation parentIdx)
+static List *
+PartitionedLeaves(Oid reloid)
 {
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("REINDEX is not yet implemented for partitioned indexes")));
+	MemoryContext oldcontext, reindex_context;
+	List		*inhoids;
+	ListCell	*lc;
+
+	/*
+	 * Create list of children in longlived context, since we will process
+	 * each child in a separate transaction
+	 */
+	reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
+			ALLOCSET_DEFAULT_SIZES);
+	oldcontext = MemoryContextSwitchTo(reindex_context);
+	inhoids = find_all_inheritors(reloid, NoLock, NULL);
+
+	/*
+	 * We have a full list of direct and indirect children, so remove from
+	 * the list any partitioned relations (including the rel itself) and
+	 * handle the leaves directly.
+	 */
+	foreach (lc, inhoids)
+	{
+		Oid	relid = lfirst_oid(lc);
+		char	relkind = get_rel_relkind(relid);
+		if (relkind == RELKIND_PARTITIONED_INDEX ||
+			relkind == RELKIND_PARTITIONED_TABLE)
+			inhoids = foreach_delete_current(inhoids, lc);
+	}
+
+	MemoryContextSwitchTo(oldcontext);
+	return inhoids;
 }
 
 /*
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 9b0c376c8c..fd6bc65c18 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -926,10 +926,12 @@ standard_ProcessUtility(PlannedStmt *pstmt,
 				switch (stmt->kind)
 				{
 					case REINDEX_OBJECT_INDEX:
-						ReindexIndex(stmt->relation, stmt->options, stmt->concurrent);
+						ReindexIndex(stmt->relation, stmt->options,
+								stmt->concurrent, isTopLevel);
 						break;
 					case REINDEX_OBJECT_TABLE:
-						ReindexTable(stmt->relation, stmt->options, stmt->concurrent);
+						ReindexTable(stmt->relation, stmt->options,
+								stmt->concurrent, isTopLevel);
 						break;
 					case REINDEX_OBJECT_SCHEMA:
 					case REINDEX_OBJECT_SYSTEM:
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index c26a102b17..df32f5b201 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -34,8 +34,10 @@ extern ObjectAddress DefineIndex(Oid relationId,
 								 bool check_not_in_use,
 								 bool skip_build,
 								 bool quiet);
-extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent);
-extern Oid	ReindexTable(RangeVar *relation, int options, bool concurrent);
+extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent,
+						bool isTopLevel);
+extern Oid	ReindexTable(RangeVar *relation, int options, bool concurrent,
+						bool isTopLevel);
 extern void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 								  int options, bool concurrent);
 extern char *makeObjectName(const char *name1, const char *name2,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index e3e6634d7e..61b4a30db4 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2196,18 +2196,20 @@ SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_ind
  concur_reindex_part_index_0_2 | concur_reindex_part_index_0 |     2
 (5 rows)
 
--- REINDEX fails for partitioned indexes
+-- REINDEX for partitioned indexes
+REINDEX INDEX concur_reindex_part_index;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index;
+REINDEX INDEX concur_reindex_part_index_0;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index_0;
 REINDEX INDEX concur_reindex_part_index_10;
-ERROR:  REINDEX is not yet implemented for partitioned indexes
 REINDEX INDEX CONCURRENTLY concur_reindex_part_index_10;
-ERROR:  REINDEX is not yet implemented for partitioned indexes
--- REINDEX is a no-op for partitioned tables
+-- REINDEX for partitioned tables
+REINDEX TABLE concur_reindex_part_10;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+REINDEX TABLE concur_reindex_part_0;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_0;
 REINDEX TABLE concur_reindex_part_10;
-WARNING:  REINDEX of partitioned tables is not yet implemented, skipping "concur_reindex_part_10"
-NOTICE:  table "concur_reindex_part_10" has no indexes to reindex
 REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
-WARNING:  REINDEX of partitioned tables is not yet implemented, skipping "concur_reindex_part_10"
-NOTICE:  table "concur_reindex_part_10" has no indexes that can be reindexed concurrently
 SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
   ORDER BY relid, level;
              relid             |         parentrelid         | level 
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index f3667bacdc..a751dd5caa 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -903,12 +903,21 @@ CREATE INDEX concur_reindex_part_index_0_2 ON ONLY concur_reindex_part_0_2 (c1);
 ALTER INDEX concur_reindex_part_index_0 ATTACH PARTITION concur_reindex_part_index_0_2;
 SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
   ORDER BY relid, level;
--- REINDEX fails for partitioned indexes
+-- REINDEX for partitioned indexes
+REINDEX INDEX concur_reindex_part_index;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index;
+REINDEX INDEX concur_reindex_part_index_0;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index_0;
 REINDEX INDEX concur_reindex_part_index_10;
 REINDEX INDEX CONCURRENTLY concur_reindex_part_index_10;
--- REINDEX is a no-op for partitioned tables
+-- REINDEX for partitioned tables
 REINDEX TABLE concur_reindex_part_10;
 REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+REINDEX TABLE concur_reindex_part_0;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_0;
+REINDEX TABLE concur_reindex_part_10;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+
 SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
   ORDER BY relid, level;
 -- REINDEX should preserve dependencies of partition tree.
-- 
2.17.0

>From a40c1bc3d63323a9417374bb7467b8c04d011912 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v5 2/3] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.
---
 doc/src/sgml/ref/create_index.sgml     |   9 ---
 src/backend/commands/indexcmds.c       | 104 +++++++++++++++++++------
 src/test/regress/expected/indexing.out |  57 +++++++++++++-
 src/test/regress/sql/indexing.sql      |  16 +++-
 4 files changed, 148 insertions(+), 38 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 33aa64e81d..c780dc9547 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -657,15 +657,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 68d75916ae..cdf31a47d8 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -655,17 +655,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1100,6 +1089,9 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+		/* Initial concurrent build of index partition is invalid */
+		flags |= INDEX_CREATE_INVALID;
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1152,8 +1144,17 @@ DefineIndex(Oid relationId,
 		 */
 		if (!stmt->relation || stmt->relation->inh)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			PartitionDesc partdesc = RelationGetPartitionDesc(rel);
 			int			nparts = partdesc->nparts;
+			char		*relname = pstrdup(RelationGetRelationName(rel));
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
 			TupleDesc	parentDesc;
@@ -1163,8 +1164,10 @@ DefineIndex(Oid relationId,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1199,18 +1202,20 @@ DefineIndex(Oid relationId,
 						ereport(ERROR,
 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 								 errmsg("cannot create unique index on partitioned table \"%s\"",
-										RelationGetRelationName(rel)),
+										relname),
 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
-										   RelationGetRelationName(rel))));
+										   relname)));
 
 					table_close(childrel, lockmode);
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1336,10 +1341,17 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				/* For concurrent build, this is a catalog-only stage */
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1366,23 +1378,72 @@ DefineIndex(Oid relationId,
 				table_close(pg_index, RowExclusiveLock);
 				heap_freetuple(newtup);
 			}
+		} else
+			table_close(rel, NoLock);
+
+		if (concurrent)
+		{
+			List	*childs;
+			ListCell *lc;
+			int		npart = 0;
+
+			/* Reindex invalid child indexes created earlier */
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
+			childs = find_inheritance_children(indexRelationId, NoLock);
+			MemoryContextSwitchTo(oldcontext);
+
+			/* Make the catalog changes visible to get_partition_parent */
+			CommandCounterIncrement();
+
+			foreach (lc, childs)
+			{
+				Oid			indexrelid = lfirst_oid(lc);
+				Relation	cldidx;
+				bool		isvalid;
+
+				if (!OidIsValid(parentIndexId))
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 npart++);
+
+				cldidx = index_open(indexrelid, ShareUpdateExclusiveLock);
+				isvalid = cldidx->rd_index->indisvalid;
+				index_close(cldidx, NoLock);
+				if (isvalid)
+					continue;
+
+				/* This may be a partitioned index, which is fine too */
+				ReindexRelationConcurrently(indexrelid, 0);
+				PushActiveSnapshot(GetTransactionSnapshot());
+			}
+
+			PopActiveSnapshot();
+			CommitTransactionCommand();
+			StartTransactionCommand();
+
+			/*
+			 * CIC needs to mark a partitioned index as VALID, which itself
+			 * requires setting READY, which is unset for CIC (even though
+			 * it's meaningless for an index without storage).
+			 */
+			index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 		}
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 		return address;
 	}
 
+	table_close(rel, NoLock);
 	if (!concurrent)
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
-
 		/* If this is the top-level index, we're done. */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
@@ -1390,10 +1451,9 @@ DefineIndex(Oid relationId,
 		return address;
 	}
 
-	/* save lockrelid and locktag for below, then close rel */
+	/* save lockrelid and locktag for below */
 	heaprelid = rel->rd_lockInfo.lockRelId;
 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
-	table_close(rel, NoLock);
 
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f78865ef81..1ae58e110f 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,60 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 35d159f41b..d908ee6d17 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,20 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

>From 6da1017ce19902cd26b714bcdb6538dc2e128601 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 16:58:42 -0500
Subject: [PATCH v5 3/3] Implement CLUSTER of partitioned table..

This requires either specification of a partitioned index on which to cluster,
or that an partitioned index was previously set clustered.
---
 src/backend/commands/cluster.c        | 139 +++++++++++++++++++-------
 src/bin/psql/tab-complete.c           |   1 +
 src/test/regress/expected/cluster.out |  23 ++++-
 src/test/regress/sql/cluster.sql      |  12 ++-
 4 files changed, 133 insertions(+), 42 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 04d12a7ece..7409c17b0b 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -33,6 +33,7 @@
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
 #include "catalog/toasting.h"
 #include "commands/cluster.h"
 #include "commands/progress.h"
@@ -72,6 +73,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+		Oid indexOid);
+static void cluster_multiple_rels(List *rvs, int options);
 
 
 /*---------------------------------------------------------------------------
@@ -124,14 +128,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("cannot cluster temporary tables of other sessions")));
 
-		/*
-		 * Reject clustering a partitioned table.
-		 */
-		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot cluster a partitioned table")));
-
 		if (stmt->indexname == NULL)
 		{
 			ListCell   *index;
@@ -169,8 +165,37 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		/* close relation, keep lock till commit */
 		table_close(rel, NoLock);
 
-		/* Do the job. */
-		cluster_rel(tableOid, indexOid, stmt->options);
+		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			/* Do the job. */
+			cluster_rel(tableOid, indexOid, stmt->options);
+		} else {
+			List	   *rvs;
+			MemoryContext cluster_context;
+
+			/* Check index directly since cluster_rel isn't called for partitioned table */
+			check_index_is_clusterable(rel, indexOid, true, AccessExclusiveLock);
+
+			/* Refuse to hold strong locks in a user transaction */
+			PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+			cluster_context = AllocSetContextCreate(PortalContext,
+												"Cluster",
+												ALLOCSET_DEFAULT_SIZES);
+
+			rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+			cluster_multiple_rels(rvs, stmt->options);
+
+			/* Start a new transaction for the cleanup work. */
+			StartTransactionCommand();
+
+			rel = table_open(tableOid, ShareUpdateExclusiveLock);
+			mark_index_clustered(rel, indexOid, true);
+			table_close(rel, NoLock);
+
+			/* Clean up working storage */
+			MemoryContextDelete(cluster_context);
+		}
 	}
 	else
 	{
@@ -180,7 +205,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		 */
 		MemoryContext cluster_context;
 		List	   *rvs;
-		ListCell   *rv;
 
 		/*
 		 * We cannot run this form of CLUSTER inside a user transaction block;
@@ -204,25 +228,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		 */
 		rvs = get_tables_to_cluster(cluster_context);
 
-		/* Commit to get out of starting transaction */
-		PopActiveSnapshot();
-		CommitTransactionCommand();
-
-		/* Ok, now that we've got them all, cluster them one by one */
-		foreach(rv, rvs)
-		{
-			RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
-
-			/* Start a new transaction for each relation. */
-			StartTransactionCommand();
-			/* functions in indexes may want a snapshot set */
-			PushActiveSnapshot(GetTransactionSnapshot());
-			/* Do the job. */
-			cluster_rel(rvtc->tableOid, rvtc->indexOid,
-						stmt->options | CLUOPT_RECHECK);
-			PopActiveSnapshot();
-			CommitTransactionCommand();
-		}
+		cluster_multiple_rels(rvs, stmt->options);
 
 		/* Start a new transaction for the cleanup work. */
 		StartTransactionCommand();
@@ -483,12 +489,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	Relation	pg_index;
 	ListCell   *index;
 
-	/* Disallow applying to a partitioned table */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("cannot mark index clustered in partitioned table")));
-
 	/*
 	 * If the index is already marked clustered, no need to do anything.
 	 */
@@ -1557,3 +1557,70 @@ get_tables_to_cluster(MemoryContext cluster_context)
 
 	return rvs;
 }
+
+/*
+ * Return a List of tables and associated index, where each index is a
+ * partition of the given index
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+	List		*inhoids;
+	ListCell	*lc;
+	List		*rvs = NIL;
+
+	MemoryContext old_context = MemoryContextSwitchTo(cluster_context);
+
+	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+	foreach(lc, inhoids)
+	{
+		Oid		indexrelid = lfirst_oid(lc);
+		Oid		relid = IndexGetRelation(indexrelid, false);
+		RelToCluster	*rvtc;
+
+		/*
+		 * We have a full list of direct and indirect children, so skip
+		 * partitioned tables and just handle their children.
+		 */
+		if (get_rel_relkind(relid) == RELKIND_PARTITIONED_TABLE)
+			continue;
+
+		rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rvtc->tableOid = relid;
+		rvtc->indexOid = indexrelid;
+		rvs = lappend(rvs, rvtc);
+	}
+
+	MemoryContextSwitchTo(old_context);
+	return rvs;
+}
+
+/* Cluster each relation in a separate transaction */
+static void
+cluster_multiple_rels(List *rvs, int options)
+{
+		ListCell *lc;
+
+		/* Commit to get out of starting transaction */
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+
+		/* Ok, now that we've got them all, cluster them one by one */
+		foreach(lc, rvs)
+		{
+			RelToCluster *rvtc = (RelToCluster *) lfirst(lc);
+
+			/* Start a new transaction for each relation. */
+			StartTransactionCommand();
+
+			/* functions in indexes may want a snapshot set */
+			PushActiveSnapshot(GetTransactionSnapshot());
+
+			/* Do the job. */
+			cluster_rel(rvtc->tableOid, rvtc->indexOid,
+						options | CLUOPT_RECHECK);
+
+			PopActiveSnapshot();
+			CommitTransactionCommand();
+		}
+}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index c4af40bfa9..e68808218a 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -587,6 +587,7 @@ static const SchemaQuery Query_for_list_of_clusterables = {
 	.catname = "pg_catalog.pg_class c",
 	.selcondition =
 	"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+	CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
 	CppAsString2(RELKIND_MATVIEW) ")",
 	.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
 	.namespace = "c.relnamespace",
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index bdae8fe00c..21b28fde6e 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -439,13 +439,28 @@ select * from clstr_temp;
 
 drop table clstr_temp;
 RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
-ERROR:  cannot mark index clustered in partitioned table
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
 CLUSTER clstrpart USING clstrpart_idx;
-ERROR:  cannot cluster a partitioned table
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a) CLUSTER
+Number of partitions: 2 (Use \d+ to list them.)
+
 DROP TABLE clstrpart;
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 188183647c..4a76848213 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -196,11 +196,19 @@ drop table clstr_temp;
 
 RESET SESSION AUTHORIZATION;
 
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
 CLUSTER clstrpart USING clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
 DROP TABLE clstrpart;
 
 -- Test CLUSTER with external tuplesorting
-- 
2.17.0

Reply via email to