On Tue, Apr 01, 2025 at 01:20:30PM -0500, Nathan Bossart wrote: > On Mon, Mar 31, 2025 at 09:33:15PM -0500, Nathan Bossart wrote: >> My goal is to commit the attached patches on Friday morning, but of course >> that is subject to change based on any feedback or objections that emerge >> in the meantime. > > I spent some more time polishing these patches this morning. There should > be no functional differences, but I did restructure 0003 to make it even > simpler.
Apologies for the noise. I noticed one more way to simplify 0002. As before, there should be no functional differences. -- nathan
>From 85e7b507629b096275d3a7386149876af7466f74 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Mon, 31 Mar 2025 10:44:26 -0500 Subject: [PATCH v12n3 1/3] Skip second WriteToc() for custom-format dumps without data. Presently, "pg_dump --format=custom" calls WriteToc() twice. The second call is intended to update the data offset information, which allegedly makes parallel pg_restore significantly faster. However, if we aren't dumping any data, this step accomplishes nothing and can be skipped. This is a preparatory optimization for a follow-up commit that will move the queries for attribute statistics to WriteToc() to save memory. Discussion: https://postgr.es/m/Z9c1rbzZegYQTOQE%40nathan --- src/bin/pg_dump/pg_backup_custom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c index e44b887eb29..b971e3bc16e 100644 --- a/src/bin/pg_dump/pg_backup_custom.c +++ b/src/bin/pg_dump/pg_backup_custom.c @@ -755,9 +755,11 @@ _CloseArchive(ArchiveHandle *AH) * If possible, re-write the TOC in order to update the data offset * information. This is not essential, as pg_restore can cope in most * cases without it; but it can make pg_restore significantly faster - * in some situations (especially parallel restore). + * in some situations (especially parallel restore). We can skip this + * step if we're not dumping any data. */ - if (ctx->hasSeek && + if (AH->public.dopt->dumpData && + ctx->hasSeek && fseeko(AH->FH, tpos, SEEK_SET) == 0) WriteToc(AH); } -- 2.39.5 (Apple Git-154)
>From 3804a6e489601bb3ac6770c413c7e9d3865ce524 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Mon, 31 Mar 2025 14:53:11 -0500 Subject: [PATCH v12n3 2/3] pg_dump: Reduce memory usage of dumps with statistics. Right now, pg_dump stores all generated commands for statistics in memory. These commands can be quite large and therefore can significantly increase pg_dump's memory footprint. To fix, wait until we are about to write out the commands before generating them, and be sure to free the commands after writing. This is implemented via a new defnDumper callback that works much like the dataDumper one but is specially designed for TOC entries. One drawback of this change is that custom dumps that include data will run the attribute statistics queries twice. However, a follow-up commit will add batching for these queries that our testing indicates should improve dump speed (even when compared to pg_dump before this commit). Author: Corey Huinker <corey.huin...@gmail.com> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com --- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_backup_archiver.c | 28 ++++++++++++++++-- src/bin/pg_dump/pg_backup_archiver.h | 5 ++++ src/bin/pg_dump/pg_dump.c | 44 ++++++++++++++++++++-------- 4 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 658986de6f8..781f8fa1cc9 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -285,6 +285,7 @@ typedef int DumpId; * Function pointer prototypes for assorted callback methods. */ +typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg); typedef int (*DataDumperPtr) (Archive *AH, const void *userArg); typedef void (*SetupWorkerPtrType) (Archive *AH); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 1d131e5a57d..bb5a02415f2 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -1266,6 +1266,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId, newToc->dataDumperArg = opts->dumpArg; newToc->hadDumper = opts->dumpFn ? true : false; + newToc->defnDumper = opts->defnFn; + newToc->defnDumperArg = opts->defnArg; + newToc->formatData = NULL; newToc->dataLength = 0; @@ -2621,7 +2624,17 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->tag); WriteStr(AH, te->desc); WriteInt(AH, te->section); - WriteStr(AH, te->defn); + + if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + + WriteStr(AH, defn); + pg_free(defn); + } + else + WriteStr(AH, te->defn); + WriteStr(AH, te->dropStmt); WriteStr(AH, te->copyStmt); WriteStr(AH, te->namespace); @@ -3849,7 +3862,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) /* * Actually print the definition. Normally we can just print the defn - * string if any, but we have three special cases: + * string if any, but we have four special cases: * * 1. A crude hack for suppressing AUTHORIZATION clause that old pg_dump * versions put into CREATE SCHEMA. Don't mutate the variant for schema @@ -3862,6 +3875,10 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) * 3. ACL LARGE OBJECTS entries need special processing because they * contain only one copy of the ACL GRANT/REVOKE commands, which we must * apply to each large object listed in the associated BLOB METADATA. + * + * 4. Entries with a defnDumper need to call it to generate the + * definition. This is primarily intended to provide a way to save memory + * for objects that need a lot of it (e.g., statistics data). */ if (ropt->noOwner && strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0) @@ -3877,6 +3894,13 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) { IssueACLPerBlob(AH, te); } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + + ahprintf(AH, "%s\n\n", defn); + pg_free(defn); + } else if (te->defn && strlen(te->defn) > 0) { ahprintf(AH, "%s\n\n", te->defn); diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index a2064f471ed..fc65e0e34d3 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -368,6 +368,9 @@ struct _tocEntry const void *dataDumperArg; /* Arg for above routine */ void *formatData; /* TOC Entry data specific to file format */ + DefnDumperPtr defnDumper; /* Routine to dump create statement */ + const void *defnDumperArg; /* Arg for above routine */ + /* working state while dumping/restoring */ pgoff_t dataLength; /* item's data size; 0 if none or unknown */ int reqs; /* do we need schema and/or data of object @@ -407,6 +410,8 @@ typedef struct _archiveOpts int nDeps; DataDumperPtr dumpFn; const void *dumpArg; + DefnDumperPtr defnFn; + const void *defnArg; } ArchiveOpts; #define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__} /* Called to add a TOC entry */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 4ca34be230c..46fb70e0a8b 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -10560,13 +10560,16 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, } /* - * dumpRelationStats -- + * dumpRelationStats_dumper -- * - * Dump command to import stats into the relation on the new database. + * Generate command to import stats into the relation on the new database. + * This routine is called by the Archiver when it wants the statistics to be + * dumped. */ -static void -dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +static char * +dumpRelationStats_dumper(Archive *fout, const void *userArg) { + const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; PGresult *res; PQExpBuffer query; @@ -10586,10 +10589,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) int i_range_length_histogram; int i_range_empty_frac; int i_range_bounds_histogram; - - /* nothing to do if we are not dumping statistics */ - if (!fout->dopt->dumpStatistics) - return; + char *ret; query = createPQExpBuffer(); if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) @@ -10770,17 +10770,37 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) PQclear(res); + destroyPQExpBuffer(query); + ret = out->data; + pg_free(out); + return ret; +} + +/* + * dumpRelationStats -- + * + * Make an ArchiveEntry for the relation statistics. The Archiver will take + * care of gathering the statistics and generating the restore commands when + * they are needed. + */ +static void +dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) +{ + const DumpableObject *dobj = &rsinfo->dobj; + + /* nothing to do if we are not dumping statistics */ + if (!fout->dopt->dumpStatistics) + return; + ArchiveEntry(fout, nilCatalogId, createDumpId(), ARCHIVE_OPTS(.tag = dobj->name, .namespace = dobj->namespace->dobj.name, .description = "STATISTICS DATA", .section = rsinfo->section, - .createStmt = out->data, + .defnFn = dumpRelationStats_dumper, + .defnArg = rsinfo, .deps = dobj->dependencies, .nDeps = dobj->nDeps)); - - destroyPQExpBuffer(out); - destroyPQExpBuffer(query); } /* -- 2.39.5 (Apple Git-154)
>From 3c7ff6e0eea0ed6435e17160df67227f3d43d7cf Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Tue, 1 Apr 2025 11:35:19 -0500 Subject: [PATCH v12n3 3/3] pg_dump: Retrieve attribute statistics in batches. Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many tables. This commit improves matters by gathering attribute statistics for 64 relations at a time. Some simple testing showed this was the ideal batch size, but performance may vary depending on workload. To construct the next set of relations for the query, we scan through the TOC list for relevant entries. Ordinarily, we can stop issuing queries once we reach the end of the list. However, custom-format dumps that include data run the statistics queries twice (thanks to commit XXXXXXXXXX), so we allow a second pass in that case. Our tests showed that batching more than makes up for any losses from running the queries twice. This change increases the memory usage of pg_dump a bit, but that isn't expected to be too egregious and is arguably well worth the trade-off. Author: Corey Huinker <corey.huin...@gmail.com> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 111 +++++++++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 46fb70e0a8b..5dfa12e2ce9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -209,6 +209,9 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* Maximum number of relations to fetch in a fetchAttributeStats() call. */ +#define MAX_ATTR_STATS_RELS 64 + /* * The default number of rows per INSERT when * --inserts is specified without --rows-per-insert @@ -10559,6 +10562,70 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, appendPQExpBuffer(out, "::%s", argtype); } +/* + * fetchAttributeStats -- + * + * Fetch next batch of rows for getAttributeStats(). + */ +static PGresult * +fetchAttributeStats(Archive *fout) +{ + ArchiveHandle *AH = (ArchiveHandle *) fout; + PQExpBuffer nspnames = createPQExpBuffer(); + PQExpBuffer relnames = createPQExpBuffer(); + int count = 0; + PGresult *res = NULL; + static bool restarted; + static TocEntry *te; + + /* If we're just starting, set our TOC pointer. */ + if (!te) + te = AH->toc->next; + + /* + * Restart the TOC scan once for custom-format dumps that include data. + * This is necessary because we'll call WriteToc() twice in that case. + */ + if (!restarted && te == AH->toc && + AH->format == archCustom && fout->dopt->dumpData) + { + te = AH->toc->next; + restarted = true; + } + + /* Scan the TOC for the next set of relevant stats entries. */ + for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next) + { + if (te->reqs && strcmp(te->desc, "STATISTICS DATA") == 0) + { + RelStatsInfo *rsinfo = (RelStatsInfo *) te->defnDumperArg; + + appendPQExpBuffer(nspnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(relnames, "%s%s", count ? "," : "", + fmtId(rsinfo->dobj.name)); + count++; + } + } + + /* Execute the query for the next batch of relations. */ + if (count > 0) + { + PQExpBuffer query = createPQExpBuffer(); + + appendPQExpBuffer(query, "EXECUTE getAttributeStats(" + "'{%s}'::pg_catalog.name[]," + "'{%s}'::pg_catalog.name[])", + nspnames->data, relnames->data); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + destroyPQExpBuffer(query); + } + + destroyPQExpBuffer(nspnames); + destroyPQExpBuffer(relnames); + return res; +} + /* * dumpRelationStats_dumper -- * @@ -10571,9 +10638,12 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) { const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg; const DumpableObject *dobj = &rsinfo->dobj; - PGresult *res; + static PGresult *res; + static int rownum; PQExpBuffer query; PQExpBuffer out; + int i_schemaname; + int i_tablename; int i_attname; int i_inherited; int i_null_frac; @@ -10595,8 +10665,8 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { appendPQExpBufferStr(query, - "PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n" - "SELECT s.attname, s.inherited, " + "PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n" + "SELECT s.schemaname, s.tablename, s.attname, s.inherited, " "s.null_frac, s.avg_width, s.n_distinct, " "s.most_common_vals, s.most_common_freqs, " "s.histogram_bounds, s.correlation, " @@ -10616,9 +10686,11 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(query, "FROM pg_catalog.pg_stats s " - "WHERE s.schemaname = $1 " - "AND s.tablename = $2 " - "ORDER BY s.attname, s.inherited"); + "JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) " + "ON s.schemaname = u.schemaname " + "AND s.tablename = u.tablename " + "WHERE s.tablename = ANY($2) " + "ORDER BY u.ord, s.attname, s.inherited"); ExecuteSqlStatement(fout, query->data); @@ -10648,16 +10720,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); + /* Fetch the next batch of attribute statistics if needed. */ + if (rownum >= PQntuples(res)) + { + PQclear(res); + res = fetchAttributeStats(fout); + rownum = 0; + } - /* fetch attribute stats */ - appendPQExpBufferStr(query, "EXECUTE getAttributeStats("); - appendStringLiteralAH(query, dobj->namespace->dobj.name, fout); - appendPQExpBufferStr(query, ", "); - appendStringLiteralAH(query, dobj->name, fout); - appendPQExpBufferStr(query, ");"); - - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - + i_schemaname = PQfnumber(res, "schemaname"); + i_tablename = PQfnumber(res, "tablename"); i_attname = PQfnumber(res, "attname"); i_inherited = PQfnumber(res, "inherited"); i_null_frac = PQfnumber(res, "null_frac"); @@ -10675,10 +10747,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram"); /* restore attribute stats */ - for (int rownum = 0; rownum < PQntuples(res); rownum++) + for (; rownum < PQntuples(res); rownum++) { const char *attname; + /* Stop if the next stat row in our cache isn't for this relation. */ + if (strcmp(dobj->name, PQgetvalue(res, rownum, i_tablename)) != 0 || + strcmp(dobj->namespace->dobj.name, PQgetvalue(res, rownum, i_schemaname)) != 0) + break; + appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n"); appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", fout->remoteVersion); @@ -10768,8 +10845,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) appendPQExpBufferStr(out, "\n);\n"); } - PQclear(res); - destroyPQExpBuffer(query); ret = out->data; pg_free(out); -- 2.39.5 (Apple Git-154)