Andrey M. Borodin писал(а) 2024-03-04 09:26:
On 6 Feb 2024, at 11:21, Michael Paquier <mich...@paquier.xyz> wrote:

The problem may be actually trickier than that, no?  Could there be
other factors to take into account for their classification, like
their sizes (typically, we'd want to process the biggest one first, I
guess)?

Maxim, what do you think about it?


Best regards, Andrey Borodin.

I agree that, as is the case with tables in REINDEX SCHEME, indices should be sorted accordingly to their size.

Attaching the second version of patch, with indices being sorted by size.

Best regards,
Svetlana Derevyanko
From 3c2f0fcbcf382cc2cb9786e26ad8027558937ea2 Mon Sep 17 00:00:00 2001
From: Svetlana Derevyanko <s.derevya...@postgrespro.ru>
Date: Fri, 29 Dec 2023 08:20:54 +0300
Subject: [PATCH v2] Add Index-level REINDEX with multiple jobs

Author: Maxim Orlov <orlo...@gmail.com>, Svetlana Derevyanko <s.derevya...@postgrespro.ru>
---
 doc/src/sgml/ref/reindexdb.sgml    |   3 +-
 src/bin/scripts/reindexdb.c        | 159 ++++++++++++++++++++++++++---
 src/bin/scripts/t/090_reindexdb.pl |   8 +-
 3 files changed, 151 insertions(+), 19 deletions(-)

diff --git a/doc/src/sgml/ref/reindexdb.sgml b/doc/src/sgml/ref/reindexdb.sgml
index 8d9ced212f..dfb5e534fb 100644
--- a/doc/src/sgml/ref/reindexdb.sgml
+++ b/doc/src/sgml/ref/reindexdb.sgml
@@ -187,8 +187,7 @@ PostgreSQL documentation
         setting is high enough to accommodate all connections.
        </para>
        <para>
-        Note that this option is incompatible with the <option>--index</option>
-        and <option>--system</option> options.
+        Note that this option is incompatible with the <option>--system</option> option.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/bin/scripts/reindexdb.c b/src/bin/scripts/reindexdb.c
index 6ae30dff31..63d99e7441 100644
--- a/src/bin/scripts/reindexdb.c
+++ b/src/bin/scripts/reindexdb.c
@@ -251,14 +251,6 @@ main(int argc, char *argv[])
 	}
 	else
 	{
-		/*
-		 * Index-level REINDEX is not supported with multiple jobs as we
-		 * cannot control the concurrent processing of multiple indexes
-		 * depending on the same relation.
-		 */
-		if (concurrentCons > 1 && indexes.head != NULL)
-			pg_fatal("cannot use multiple jobs to reindex indexes");
-
 		if (dbname == NULL)
 		{
 			if (getenv("PGDATABASE"))
@@ -279,7 +271,7 @@ main(int argc, char *argv[])
 		if (indexes.head != NULL)
 			reindex_one_database(&cparams, REINDEX_INDEX, &indexes,
 								 progname, echo, verbose,
-								 concurrently, 1, tablespace);
+								 concurrently, concurrentCons, tablespace);
 
 		if (tables.head != NULL)
 			reindex_one_database(&cparams, REINDEX_TABLE, &tables,
@@ -299,6 +291,67 @@ main(int argc, char *argv[])
 	exit(0);
 }
 
+static SimpleStringList *
+sort_indices_by_tables(PGconn *conn, SimpleStringList *user_list, SimpleStringList *indices_tables_list,
+					   SimpleStringList ***indices_process_list, bool echo)
+{
+	SimpleStringList *process_list = pg_malloc0(sizeof(SimpleStringList));
+	SimpleStringListCell *idx_cell,
+			   *idx_table,
+			   *cell;
+	int			ipl_len = 10,
+				current_tables_num = 0,
+				i;
+
+	*indices_process_list = pg_malloc0(sizeof(*indices_process_list) * ipl_len);
+
+	for (idx_cell = user_list->head, idx_table = indices_tables_list->head;
+		 idx_cell;
+		 idx_cell = idx_cell->next, idx_table = idx_table->next)
+	{
+		SimpleStringList *list_to_add = NULL;
+
+		if (!current_tables_num)
+		{
+			simple_string_list_append(process_list, idx_table->val);
+			(*indices_process_list)[0] = pg_malloc0(sizeof(SimpleStringList));
+			current_tables_num++;
+		}
+
+		cell = process_list->head;
+		for (i = 0; i < current_tables_num; i++)
+		{
+			if (!strcmp(idx_table->val, cell->val))
+			{
+				list_to_add = (*indices_process_list)[i];
+				break;
+			}
+
+		}
+
+		if (!list_to_add)
+		{
+			simple_string_list_append(process_list, idx_table->val);
+
+			if (current_tables_num >= ipl_len)
+			{
+				ipl_len *= 2;
+				*indices_process_list = pg_realloc(*indices_process_list,
+												   sizeof(*indices_process_list) * ipl_len);
+			}
+
+			(*indices_process_list)[current_tables_num] = pg_malloc0(sizeof(SimpleStringList));
+			list_to_add = (*indices_process_list)[current_tables_num];
+			current_tables_num++;
+		}
+
+		simple_string_list_append(list_to_add, idx_cell->val);
+
+	}
+
+	return process_list;
+}
+
 static void
 reindex_one_database(ConnParams *cparams, ReindexType type,
 					 SimpleStringList *user_list,
@@ -310,10 +363,13 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 	SimpleStringListCell *cell;
 	bool		parallel = concurrentCons > 1;
 	SimpleStringList *process_list = user_list;
+	SimpleStringList **indices_process_list = NULL;
+	SimpleStringList *indices_tables_list = NULL;
 	ReindexType process_type = type;
 	ParallelSlotArray *sa;
 	bool		failed = false;
-	int			items_count = 0;
+	int			items_count = 0,
+				i = 0;
 
 	conn = connectDatabase(cparams, progname, echo, false, true);
 
@@ -383,8 +439,23 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 					return;
 				break;
 
-			case REINDEX_SYSTEM:
 			case REINDEX_INDEX:
+				Assert(user_list != NULL);
+
+				/* Build a list of relations from the indices */
+				indices_tables_list = get_parallel_object_list(conn, process_type,
+															   user_list, echo);
+				/* Distribute indices by tables */
+				process_list = sort_indices_by_tables(conn, user_list, indices_tables_list,
+													  &indices_process_list, echo);
+				simple_string_list_destroy(indices_tables_list);
+
+				/* Bail out if nothing to process */
+				if (process_list == NULL)
+					return;
+				break;
+
+			case REINDEX_SYSTEM:
 				/* not supported */
 				Assert(false);
 				break;
@@ -440,8 +511,19 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 		}
 
 		ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
-		run_reindex_command(free_slot->connection, process_type, objname,
-							echo, verbose, concurrently, true, tablespace);
+		if (parallel && process_type == REINDEX_INDEX)
+		{
+			SimpleStringListCell *idx_cell;
+
+			for (idx_cell = indices_process_list[i]->head; idx_cell; idx_cell = idx_cell->next)
+				run_reindex_command(free_slot->connection, process_type, idx_cell->val,
+									echo, verbose, concurrently, true, tablespace);
+			simple_string_list_destroy(indices_process_list[i]);
+			i++;
+		}
+		else
+			run_reindex_command(free_slot->connection, process_type, objname,
+								echo, verbose, concurrently, true, tablespace);
 
 		cell = cell->next;
 	} while (cell != NULL);
@@ -456,6 +538,9 @@ finish:
 		pg_free(process_list);
 	}
 
+	if (indices_process_list)
+		pg_free(indices_process_list);
+
 	ParallelSlotsTerminate(sa);
 	pfree(sa);
 
@@ -667,8 +752,54 @@ get_parallel_object_list(PGconn *conn, ReindexType type,
 			}
 			break;
 
-		case REINDEX_SYSTEM:
 		case REINDEX_INDEX:
+			{
+				SimpleStringListCell *cell;
+				bool		idx_listed = false;
+
+				Assert(user_list != NULL);
+
+				/*
+				 * Straight forward index-level REINDEX is not supported with
+				 * multiple jobs as we cannot control the concurrent
+				 * processing of multiple indexes depending on the same
+				 * relation.  But we can extract appropriate table name for
+				 * the index and put REINDEX INDEX commands into different
+				 * jobs, accordingly to the parent tables.
+				 */
+				appendPQExpBufferStr(&catalog_query,
+									 "WITH idx as (\n"
+									 "  SELECT c.relname, c.relpages, ns.nspname\n"
+									 "  FROM pg_catalog.pg_class c,\n"
+									 "       pg_catalog.pg_namespace ns\n"
+									 "  WHERE c.relnamespace OPERATOR(pg_catalog.=) ns.oid AND\n"
+									 "        c.oid OPERATOR(pg_catalog.=) ANY(ARRAY['\n");
+
+				for (cell = user_list->head; cell; cell = cell->next)
+				{
+					if (idx_listed)
+						appendPQExpBufferStr(&catalog_query, "', '");
+					else
+						idx_listed = true;
+
+					appendQualifiedRelation(&catalog_query, cell->val, conn, echo);
+				}
+
+				appendPQExpBufferStr(&catalog_query,
+									 "'\n"
+									 "        ]::pg_catalog.regclass[])\n"
+									 ") \n"
+									 "SELECT pi.tablename, pi.schemaname \n"
+									 "FROM pg_catalog.pg_indexes as pi, idx\n"
+									 "WHERE pi.schemaname OPERATOR(pg_catalog.=) idx.nspname AND \n"
+									 "      pi.indexname OPERATOR(pg_catalog.=) idx.relname\n"
+									 "ORDER BY idx.relpages DESC;");
+
+				executeCommand(conn, "RESET search_path;", false);
+			}
+			break;
+
+		case REINDEX_SYSTEM:
 		case REINDEX_TABLE:
 			Assert(false);
 			break;
diff --git a/src/bin/scripts/t/090_reindexdb.pl b/src/bin/scripts/t/090_reindexdb.pl
index 4f1a141132..cacaac4940 100644
--- a/src/bin/scripts/t/090_reindexdb.pl
+++ b/src/bin/scripts/t/090_reindexdb.pl
@@ -236,9 +236,11 @@ $node->safe_psql(
 	CREATE SCHEMA s1;
 	CREATE TABLE s1.t1(id integer);
 	CREATE INDEX ON s1.t1(id);
+	CREATE INDEX i1 ON s1.t1(id);
 	CREATE SCHEMA s2;
 	CREATE TABLE s2.t2(id integer);
 	CREATE INDEX ON s2.t2(id);
+	CREATE INDEX i2 ON s2.t2(id);
 	-- empty schema
 	CREATE SCHEMA s3;
 |);
@@ -246,9 +248,9 @@ $node->safe_psql(
 $node->command_fails(
 	[ 'reindexdb', '-j', '2', '-s', 'postgres' ],
 	'parallel reindexdb cannot process system catalogs');
-$node->command_fails(
-	[ 'reindexdb', '-j', '2', '-i', 'i1', 'postgres' ],
-	'parallel reindexdb cannot process indexes');
+$node->command_ok(
+	[ 'reindexdb', '-j', '2', '-i', 's1.i1', '-i', 's2.i2', 'postgres' ],
+	'parallel reindexdb for indices');
 # Note that the ordering of the commands is not stable, so the second
 # command for s2.t2 is not checked after.
 $node->issues_sql_like(
-- 
2.34.1

Reply via email to