On Tue, 2025-03-11 at 11:26 -0400, Tom Lane wrote: > Right, that was what I was thinking, but hadn't had time to look in > detail. The postDataBound dependency isn't real helpful here, we > could lose that if we had the data dependency.
Attached a patch. It's a bit messier than I expected, so I'm open to other suggestions. The reason is because materialized view data is also pushed to RESTORE_PASS_POST_ACL, so we need to do the same for the statistics (otherwise the dependency is just ignored). Regards, Jeff Davis
From 4e84889cb890e5e855559191e6ca8d152e2495e0 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Tue, 11 Mar 2025 13:51:25 -0700 Subject: [PATCH v1] Make MV statistics depend on MV data. REFRESH MATERIALIZED VIEW replaces the storage, which resets statistics, so statistics must be restored afterward. If statistics are for a materialized view, and if the materialized view data is being dumped, create a dependency from the former referring to the latter. Defer the statistics to SECTION_POST_DATA, which is where the materialized view data goes. In that case, it also requires deferring the statistics to RESTORE_PASS_POST_ACL. Reported-by: Ashutosh Bapat <ashutosh.bapat....@gmail.com> Discussion: https://postgr.es/m/caexhw5s47kmubpbbrjzsm-zfe0tj2o3gbagb7yaye8rq-v2...@mail.gmail.com --- src/bin/pg_dump/pg_backup_archiver.c | 46 ++++++++---- src/bin/pg_dump/pg_dump.c | 104 +++++++++++++++------------ src/bin/pg_dump/pg_dump.h | 3 +- src/bin/pg_dump/pg_dump_sort.c | 2 +- 4 files changed, 95 insertions(+), 60 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 7480e122b61..ace2f203497 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te); static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te); static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te); static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH); -static RestorePass _tocEntryRestorePass(TocEntry *te); +static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te); static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); @@ -102,7 +102,8 @@ static void pending_list_append(TocEntry *l, TocEntry *te); static void pending_list_remove(TocEntry *te); static int TocEntrySizeCompareQsort(const void *p1, const void *p2); static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg); -static void move_to_ready_heap(TocEntry *pending_list, +static void move_to_ready_heap(ArchiveHandle *AH, + TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass); static TocEntry *pop_next_work_item(binaryheap *ready_heap, @@ -747,7 +748,7 @@ RestoreArchive(Archive *AHX) if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0) continue; /* ignore if not to be dumped at all */ - switch (_tocEntryRestorePass(te)) + switch (_tocEntryRestorePass(AH, te)) { case RESTORE_PASS_MAIN: (void) restore_toc_entry(AH, te, false); @@ -766,7 +767,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(te) == RESTORE_PASS_ACL) + _tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -776,7 +777,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL) + _tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -3212,7 +3213,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) * See notes with the RestorePass typedef in pg_backup_archiver.h. */ static RestorePass -_tocEntryRestorePass(TocEntry *te) +_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te) { /* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */ if (strcmp(te->desc, "ACL") == 0 || @@ -3233,6 +3234,26 @@ _tocEntryRestorePass(TocEntry *te) strncmp(te->tag, "EVENT TRIGGER ", 14) == 0) return RESTORE_PASS_POST_ACL; + /* + * If statistics data is dependent on materialized view data, it must be + * deferred to RESTORE_PASS_POST_ACL. + */ + if (strcmp(te->desc, "STATISTICS DATA") == 0) + { + for (int i = 0; i < te->nDeps; i++) + { + DumpId depid = te->dependencies[i]; + + if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL) + { + TocEntry *otherte = AH->tocsByDumpId[depid]; + + if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0) + return RESTORE_PASS_POST_ACL; + } + } + } + /* All else can be handled in the main pass. */ return RESTORE_PASS_MAIN; } @@ -4242,7 +4263,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list) * not set skipped_some in this case, since by assumption no main-pass * items could depend on these. */ - if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN) + if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN) do_now = false; if (do_now) @@ -4324,7 +4345,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, * process in the current restore pass. */ AH->restorePass = RESTORE_PASS_MAIN; - move_to_ready_heap(pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); /* * main parent loop @@ -4373,7 +4394,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, /* Advance to next restore pass */ AH->restorePass++; /* That probably allows some stuff to be made ready */ - move_to_ready_heap(pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); /* Loop around to see if anything's now ready */ continue; } @@ -4544,7 +4565,8 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg) * which applies the same logic one-at-a-time.) */ static void -move_to_ready_heap(TocEntry *pending_list, +move_to_ready_heap(ArchiveHandle *AH, + TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass) { @@ -4557,7 +4579,7 @@ move_to_ready_heap(TocEntry *pending_list, next_te = te->pending_next; if (te->depCount == 0 && - _tocEntryRestorePass(te) == pass) + _tocEntryRestorePass(AH, te) == pass) { /* Remove it from pending_list ... */ pending_list_remove(te); @@ -4951,7 +4973,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, * memberships changed. */ if (otherte->depCount == 0 && - _tocEntryRestorePass(otherte) == AH->restorePass && + _tocEntryRestorePass(AH, otherte) == AH->restorePass && otherte->pending_prev != NULL && ready_heap != NULL) { diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index c371570501a..7b442d8391d 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2964,6 +2964,21 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) tbinfo->dataObj = tdinfo; + /* + * If statistics and data are being dumped for a materialized view, the + * statistics depend on the materialized view data. That's because REFRESH + * MATERIALIZED VIEW will completely replace the storage and reset the + * stats. + * + * The dependency is added here because the statistics objects are created + * first. + */ + if (tbinfo->relkind == RELKIND_MATVIEW && tbinfo->stats != NULL) + { + tbinfo->stats->section = SECTION_POST_DATA; + addObjectDependency(&tbinfo->stats->dobj, tdinfo->dobj.dumpId); + } + /* Make sure that we'll collect per-column info for this table. */ tbinfo->interesting = true; } @@ -6850,7 +6865,34 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, info->relkind = relkind; info->indAttNames = indAttNames; info->nindAttNames = nindAttNames; - info->postponed_def = false; + + /* + * Ordinarily, stats go in SECTION_DATA for tables and + * SECTION_POST_DATA for indexes. + * + * However, the section may be updated later for materialized views. + * Matview stats depend on the matview data, because REFRESH + * MATERIALIZED VIEW replaces the storage and resets the stats, and + * the matview data is in SECTION_POST_DATA. Also, the materialized + * view definition may be postponed from SECTION_PRE_DATA to + * SECTION_POST_DATA to resolve some kinds of dependency problems (see + * repairMatViewBoundaryMultiLoop()). + */ + switch (info->relkind) + { + case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: + case RELKIND_MATVIEW: + info->section = SECTION_DATA; + break; + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + info->section = SECTION_POST_DATA; + break; + default: + pg_fatal("cannot dump statistics for relation kind '%c'", + info->relkind); + } return info; } @@ -7249,9 +7291,17 @@ getTables(Archive *fout, int *numTables) /* Add statistics */ if (tblinfo[i].interesting) - getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relpages, - PQgetvalue(res, i, i_reltuples), - relallvisible, tblinfo[i].relkind, NULL, 0); + { + RelStatsInfo *stats; + + stats = getRelationStatistics(fout, &tblinfo[i].dobj, + tblinfo[i].relpages, + PQgetvalue(res, i, i_reltuples), + relallvisible, + tblinfo[i].relkind, NULL, 0); + if (tblinfo[i].relkind == RELKIND_MATVIEW) + tblinfo[i].stats = stats; + } /* * Read-lock target tables to make sure they aren't DROPPED or altered @@ -10448,34 +10498,6 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, appendPQExpBuffer(out, "::%s", argtype); } -/* - * Decide which section to use based on the relkind of the parent object. - * - * NB: materialized views may be postponed from SECTION_PRE_DATA to - * SECTION_POST_DATA to resolve some kinds of dependency problems. If so, the - * matview stats will also be postponed to SECTION_POST_DATA. See - * repairMatViewBoundaryMultiLoop(). - */ -static teSection -statisticsDumpSection(const RelStatsInfo *rsinfo) -{ - switch (rsinfo->relkind) - { - case RELKIND_RELATION: - case RELKIND_PARTITIONED_TABLE: - case RELKIND_MATVIEW: - return SECTION_DATA; - case RELKIND_INDEX: - case RELKIND_PARTITIONED_INDEX: - return SECTION_POST_DATA; - default: - pg_fatal("cannot dump statistics for relation kind '%c'", - rsinfo->relkind); - } - - return 0; /* keep compiler quiet */ -} - /* * dumpRelationStats -- * @@ -10488,8 +10510,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) PGresult *res; PQExpBuffer query; PQExpBuffer out; - DumpId *deps = NULL; - int ndeps = 0; char *qualified_name; int i_attname; int i_inherited; @@ -10511,13 +10531,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) if (!fout->dopt->dumpStatistics) return; - /* dependent on the relation definition, if doing schema */ - if (fout->dopt->dumpSchema) - { - deps = dobj->dependencies; - ndeps = dobj->nDeps; - } - query = createPQExpBuffer(); if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { @@ -10690,11 +10703,10 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) ARCHIVE_OPTS(.tag = dobj->name, .namespace = dobj->namespace->dobj.name, .description = "STATISTICS DATA", - .section = rsinfo->postponed_def ? - SECTION_POST_DATA : statisticsDumpSection(rsinfo), + .section = rsinfo->section, .createStmt = out->data, - .deps = deps, - .nDeps = ndeps)); + .deps = dobj->dependencies, + .nDeps = dobj->nDeps)); free(qualified_name); destroyPQExpBuffer(out); @@ -19383,7 +19395,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, break; case DO_REL_STATS: /* stats section varies by parent object type, DATA or POST */ - if (statisticsDumpSection((RelStatsInfo *) dobj) == SECTION_DATA) + if (((RelStatsInfo *) dobj)->section == SECTION_DATA) { addObjectDependency(dobj, preDataBound->dumpId); addObjectDependency(postDataBound, dobj->dumpId); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index bbdb30b5f54..70f7a369e4a 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -369,6 +369,7 @@ typedef struct _tableInfo bool *notnull_islocal; /* true if NOT NULL has local definition */ struct _attrDefInfo **attrdefs; /* DEFAULT expressions */ struct _constraintInfo *checkexprs; /* CHECK constraints */ + struct _relStatsInfo *stats; /* only set for matviews */ bool needs_override; /* has GENERATED ALWAYS AS IDENTITY */ char *amname; /* relation access method */ @@ -449,7 +450,7 @@ typedef struct _relStatsInfo */ char **indAttNames; /* attnames of the index, in order */ int32 nindAttNames; /* number of attnames stored (can be 0) */ - bool postponed_def; /* stats must be postponed into post-data */ + teSection section; /* stats may appear in data or post-data */ } RelStatsInfo; typedef struct _statsExtInfo diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index f75e9928bad..0b0977788f1 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -820,7 +820,7 @@ repairMatViewBoundaryMultiLoop(DumpableObject *boundaryobj, RelStatsInfo *nextinfo = (RelStatsInfo *) nextobj; if (nextinfo->relkind == RELKIND_MATVIEW) - nextinfo->postponed_def = true; + nextinfo->section = SECTION_POST_DATA; } } -- 2.34.1