On 2025-Mar-07, Álvaro Herrera wrote: > Anyway, my version of this is attached. It fixes the problems with your > patch, but not Orlov's fundamental bug.
And of course I forgot to actually attach the patch. Good grief. -- Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/ "No deja de ser humillante para una persona de ingenio saber que no hay tonto que no le pueda enseñar algo." (Jean B. Say)
diff --git a/src/bin/scripts/reindexdb.c b/src/bin/scripts/reindexdb.c index b00c8112869..c55742daf81 100644 --- a/src/bin/scripts/reindexdb.c +++ b/src/bin/scripts/reindexdb.c @@ -12,6 +12,7 @@ #include "postgres_fe.h" #include <limits.h> +#include <stdlib.h> #include "catalog/pg_class_d.h" #include "common.h" @@ -33,10 +34,14 @@ typedef enum ReindexType } ReindexType; -static SimpleStringList *get_parallel_object_list(PGconn *conn, +static SimpleStringList *get_parallel_tables_list(PGconn *conn, ReindexType type, SimpleStringList *user_list, bool echo); +static void get_parallel_tabidx_list(PGconn *conn, + SimpleStringList *index_list, + SimpleOidList **table_list, + bool echo); static void reindex_one_database(ConnParams *cparams, ReindexType type, SimpleStringList *user_list, const char *progname, @@ -276,15 +281,15 @@ reindex_one_database(ConnParams *cparams, ReindexType type, { PGconn *conn; SimpleStringListCell *cell; - SimpleStringListCell *indices_tables_cell = NULL; + SimpleOidListCell *indices_tables_cell = NULL; bool parallel = concurrentCons > 1; - SimpleStringList *process_list = user_list; - SimpleStringList *indices_tables_list = NULL; + SimpleStringList *process_list; + SimpleOidList *tableoid_list = NULL; ReindexType process_type = type; ParallelSlotArray *sa; bool failed = false; - int items_count = 0; - char *prev_index_table_name = NULL; + int items_count; + Oid prev_index_table_oid = InvalidOid; ParallelSlot *free_slot = NULL; conn = connectDatabase(cparams, progname, echo, false, true); @@ -322,6 +327,7 @@ reindex_one_database(ConnParams *cparams, ReindexType type, case REINDEX_INDEX: case REINDEX_SCHEMA: case REINDEX_TABLE: + process_list = user_list; Assert(user_list != NULL); break; } @@ -330,26 +336,14 @@ reindex_one_database(ConnParams *cparams, ReindexType type, { switch (process_type) { - case REINDEX_DATABASE: - - /* Build a list of relations from the database */ - process_list = get_parallel_object_list(conn, process_type, - user_list, echo); - process_type = REINDEX_TABLE; - - /* Bail out if nothing to process */ - if (process_list == NULL) - { - PQfinish(conn); - return; - } - break; - case REINDEX_SCHEMA: Assert(user_list != NULL); + /* fall through */ - /* Build a list of relations from all the schemas */ - process_list = get_parallel_object_list(conn, process_type, + case REINDEX_DATABASE: + + /* Build a list of relations from the given list */ + process_list = get_parallel_tables_list(conn, process_type, user_list, echo); process_type = REINDEX_TABLE; @@ -362,45 +356,34 @@ reindex_one_database(ConnParams *cparams, ReindexType type, break; case REINDEX_INDEX: - Assert(user_list != NULL); /* - * Build a list of relations from the indices. This will - * accordingly reorder the list of indices too. + * Generate a list of indexes and a matching list of table + * OIDs, based on the user-specified index names. */ - indices_tables_list = get_parallel_object_list(conn, process_type, - user_list, echo); + get_parallel_tabidx_list(conn, user_list, &tableoid_list, + echo); - /* - * Bail out if nothing to process. 'user_list' was modified - * in-place, so check if it has at least one cell. - */ - if (user_list->head == NULL) + /* Bail out if nothing to process */ + if (tableoid_list == NULL) { PQfinish(conn); return; } - /* - * Assuming 'user_list' is not empty, 'indices_tables_list' - * shouldn't be empty as well. - */ - Assert(indices_tables_list != NULL); - indices_tables_cell = indices_tables_list->head; + indices_tables_cell = tableoid_list->head; + process_list = user_list; break; case REINDEX_SYSTEM: /* not supported */ + process_list = NULL; Assert(false); break; case REINDEX_TABLE: - - /* - * Fall through. The list of items for tables is already - * created. - */ + process_list = user_list; break; } } @@ -410,6 +393,7 @@ reindex_one_database(ConnParams *cparams, ReindexType type, * the list. We choose the minimum between the number of concurrent * connections and the number of items in the list. */ + items_count = 0; for (cell = process_list->head; cell; cell = cell->next) { items_count++; @@ -442,15 +426,15 @@ reindex_one_database(ConnParams *cparams, ReindexType type, /* * For parallel index-level REINDEX, the indices of the same table are * ordered together and they are to be processed by the same job. So, - * we don't switch the job as soon as the index belongs to the same - * table as the previous one. + * we don't switch parallel slot until the index belongs to a + * different table as the previous one. */ if (parallel && process_type == REINDEX_INDEX) { - if (prev_index_table_name != NULL && - strcmp(prev_index_table_name, indices_tables_cell->val) == 0) + if (prev_index_table_oid != InvalidOid && + prev_index_table_oid == indices_tables_cell->val) need_new_slot = false; - prev_index_table_name = indices_tables_cell->val; + prev_index_table_oid = indices_tables_cell->val; indices_tables_cell = indices_tables_cell->next; } @@ -482,10 +466,10 @@ finish: pg_free(process_list); } - if (indices_tables_list) + if (tableoid_list) { - simple_string_list_destroy(indices_tables_list); - pg_free(indices_tables_list); + simple_oid_list_destroy(tableoid_list); + pg_free(tableoid_list); } ParallelSlotsTerminate(sa); @@ -623,7 +607,7 @@ run_reindex_command(PGconn *conn, ReindexType type, const char *name, } /* - * Prepare the list of objects to process by querying the catalogs. + * Prepare the list of tables to process by querying the catalogs. * * This function will return a SimpleStringList object containing the entire * list of tables in the given database that should be processed by a parallel @@ -631,15 +615,13 @@ run_reindex_command(PGconn *conn, ReindexType type, const char *name, * table. */ static SimpleStringList * -get_parallel_object_list(PGconn *conn, ReindexType type, +get_parallel_tables_list(PGconn *conn, ReindexType type, SimpleStringList *user_list, bool echo) { PQExpBufferData catalog_query; - PQExpBufferData buf; PGresult *res; SimpleStringList *tables; - int ntups, - i; + int ntups; initPQExpBuffer(&catalog_query); @@ -668,7 +650,6 @@ get_parallel_object_list(PGconn *conn, ReindexType type, case REINDEX_SCHEMA: { SimpleStringListCell *cell; - bool nsp_listed = false; Assert(user_list != NULL); @@ -690,14 +671,10 @@ get_parallel_object_list(PGconn *conn, ReindexType type, for (cell = user_list->head; cell; cell = cell->next) { - const char *nspname = cell->val; + if (cell != user_list->head) + appendPQExpBufferChar(&catalog_query, ','); - if (nsp_listed) - appendPQExpBufferStr(&catalog_query, ", "); - else - nsp_listed = true; - - appendStringLiteralConn(&catalog_query, nspname, conn); + appendStringLiteralConn(&catalog_query, cell->val, conn); } appendPQExpBufferStr(&catalog_query, ")\n" @@ -706,59 +683,6 @@ get_parallel_object_list(PGconn *conn, ReindexType type, break; case REINDEX_INDEX: - { - SimpleStringListCell *cell; - - Assert(user_list != NULL); - - /* - * Straight-forward index-level REINDEX is not supported with - * multiple jobs as we cannot control the concurrent - * processing of multiple indexes depending on the same - * relation. But we can extract the appropriate table name - * for the index and put REINDEX INDEX commands into different - * jobs, according to the parent tables. - * - * We will order the results to group the same tables - * together. We fetch index names as well to build a new list - * of them with matching order. - */ - appendPQExpBufferStr(&catalog_query, - "SELECT t.relname, n.nspname, i.relname\n" - "FROM pg_catalog.pg_index x\n" - "JOIN pg_catalog.pg_class t ON t.oid = x.indrelid\n" - "JOIN pg_catalog.pg_class i ON i.oid = x.indexrelid\n" - "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = t.relnamespace\n" - "WHERE x.indexrelid OPERATOR(pg_catalog.=) ANY(ARRAY['"); - - for (cell = user_list->head; cell; cell = cell->next) - { - if (cell != user_list->head) - appendPQExpBufferStr(&catalog_query, "', '"); - - appendQualifiedRelation(&catalog_query, cell->val, conn, echo); - } - - /* - * Order tables by the size of its greatest index. Within the - * table, order indexes by their sizes. - */ - appendPQExpBufferStr(&catalog_query, - "']::pg_catalog.regclass[])\n" - "ORDER BY max(i.relpages) OVER \n" - " (PARTITION BY n.nspname, t.relname),\n" - " n.nspname, t.relname, i.relpages;\n"); - - /* - * We're going to re-order the user_list to match the order of - * tables. So, empty the user_list to fill it from the query - * result. - */ - simple_string_list_destroy(user_list); - user_list->head = user_list->tail = NULL; - } - break; - case REINDEX_SYSTEM: case REINDEX_TABLE: Assert(false); @@ -781,38 +705,119 @@ get_parallel_object_list(PGconn *conn, ReindexType type, tables = pg_malloc0(sizeof(SimpleStringList)); /* Build qualified identifiers for each table */ - initPQExpBuffer(&buf); - for (i = 0; i < ntups; i++) + for (int i = 0; i < ntups; i++) { - appendPQExpBufferStr(&buf, - fmtQualifiedIdEnc(PQgetvalue(res, i, 1), - PQgetvalue(res, i, 0), - PQclientEncoding(conn))); - - simple_string_list_append(tables, buf.data); - resetPQExpBuffer(&buf); - - if (type == REINDEX_INDEX) - { - /* - * For index-level REINDEX, rebuild the list of indexes to match - * the order of tables list. - */ - appendPQExpBufferStr(&buf, - fmtQualifiedIdEnc(PQgetvalue(res, i, 1), - PQgetvalue(res, i, 2), - PQclientEncoding(conn))); - - simple_string_list_append(user_list, buf.data); - resetPQExpBuffer(&buf); - } + simple_string_list_append(tables, + fmtQualifiedIdEnc(PQgetvalue(res, i, 1), + PQgetvalue(res, i, 0), + PQclientEncoding(conn))); } - termPQExpBuffer(&buf); PQclear(res); return tables; } +/* + * Given a user-specified list of indexes, prepare a matching list + * indexes to process, and also a matching list of table OIDs to which each + * index belongs. The latter is needed to avoid scheduling two parallel tasks + * with concurrent reindexing of indexes on the same table. + * + * On input, index_list is the user-specified index list. table_list is an + * output argument which is filled with a list of the tables to process; on + * output, index_list is a matching reordered list of indexes. Caller is + * supposed to walk both lists in unison. Both pointers will be NULL if + * there's nothing to process. + */ +static void +get_parallel_tabidx_list(PGconn *conn, + SimpleStringList *index_list, + SimpleOidList **table_list, + bool echo) +{ + PQExpBufferData catalog_query; + PGresult *res; + SimpleStringListCell *cell; + int ntups; + + Assert(index_list != NULL); + + initPQExpBuffer(&catalog_query); + + /* + * The queries here are using a safe search_path, so there's no need to + * fully qualify everything. + */ + + /* + * We cannot use REINDEX in parallel in a straightforward way, because + * we'd be unable to control concurrent processing of multiple indexes on + * the same table. But we can extract the table OID together with each + * indexes, to allow scheduling each REINDEX INDEX command on a separate + * job according to the owning table. + * + * We order the results to group all indexes of each table together. + */ + appendPQExpBufferStr(&catalog_query, + "SELECT x.indrelid, n.nspname, i.relname\n" + "FROM pg_catalog.pg_index x\n" + "JOIN pg_catalog.pg_class i ON i.oid = x.indexrelid\n" + "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = i.relnamespace\n" + "WHERE x.indexrelid = ANY(ARRAY['"); + + for (cell = index_list->head; cell; cell = cell->next) + { + if (cell != index_list->head) + appendPQExpBufferStr(&catalog_query, "', '"); + + appendQualifiedRelation(&catalog_query, cell->val, conn, echo); + } + + /* + * Order tables by the size of its greatest index. Within each table, + * order indexes by their sizes. + */ + appendPQExpBufferStr(&catalog_query, + "']::pg_catalog.regclass[])\n" + "ORDER BY max(i.relpages) OVER \n" + " (PARTITION BY x.indrelid),\n" + " x.indrelid, i.relpages;\n"); + + /* Empty the original index_list to fill it from the query result. */ + simple_string_list_destroy(index_list); + index_list->head = index_list->tail = NULL; + + res = executeQuery(conn, catalog_query.data, echo); + termPQExpBuffer(&catalog_query); + + /* + * If no rows are returned, there are no matching tables, so we are done. + */ + ntups = PQntuples(res); + if (ntups == 0) + { + PQclear(res); + return; + } + + *table_list = pg_malloc0(sizeof(SimpleOidList)); + + /* + * Build two lists, one with table OIDs and the other with fully-qualified + * index names. + */ + for (int i = 0; i < ntups; i++) + { + simple_oid_list_append(*table_list, atooid(PQgetvalue(res, i, 0))); + simple_string_list_append(index_list, + fmtQualifiedIdEnc(PQgetvalue(res, i, 1), + PQgetvalue(res, i, 2), + PQclientEncoding(conn))); + } + + PQclear(res); +} + static void reindex_all_databases(ConnParams *cparams, const char *progname, bool echo, bool quiet, bool verbose,