On Mon, May 05, 2025 at 02:23:25PM -0500, Nathan Bossart wrote: > That leaves pg_shdepend. For now, I've just instructed pg_upgrade to COPY > the relevant pg_shdepend rows as an independent step, but perhaps there's a > reasonably straightforward way to put that in pg_dump, too.
It turns out there is. TableDataInfo's filtercond field can be used to easily add a WHERE clause to the data dumping command. On my laptop, upgrading with --jobs=8 with 10M large objects evenly distributed across 10 databases (each with a non-bootstrap-superuser owner and another role with select rights) takes ~100 seconds without this patch and ~30 seconds with it. I've also added dependency tracking, version checks (this only works for upgrades from >=v12 for now), a hack to ensure the columns for pg_largeobject_metadata/pg_shdepend are collected, and comments. I'm sure there's something I've missed, but this patch has worked well in my tests thus far. Taking a step back, I'm a little disappointed in the gains here. A 3-9x speedup is nice, but I guess I was hoping to find another order of magnitude somewhere. To do any better, I think we'd need to copy the files for pg_largeobject_metadata directly for upgrades from >= v16, but that would have to fit somewhere between when pg_restore creates the database and when it restores any large object comments/seclabels. I'm not wild about the amount of hackery required to get that working. -- nathan
>From b14b74a4ad3ffff60c2fcb3c241766a651d681c5 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Wed, 7 May 2025 09:06:05 -0500 Subject: [PATCH v2 1/1] pg_upgrade: Use COPY for large object metadata. Reported-by: Hannu Krosing <han...@google.com> Suggested-by: Tom Lane <t...@sss.pgh.pa.us> Discussion: https://postgr.es/m/CAMT0RQSS-6qLH%2BzYsOeUbAYhop3wmQTkNmQpo5--QRDUR%2BqYmQ%40mail.gmail.com --- src/bin/pg_dump/pg_backup_archiver.c | 13 +++++ src/bin/pg_dump/pg_dump.c | 78 ++++++++++++++++++++++++++-- src/bin/pg_dump/pg_dump_sort.c | 2 +- src/bin/pg_dump/t/002_pg_dump.pl | 4 +- 4 files changed, 90 insertions(+), 7 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index afa42337b11..4c387b0cb5e 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -31,6 +31,8 @@ #endif #include "catalog/pg_class_d.h" +#include "catalog/pg_largeobject_metadata.h" +#include "catalog/pg_shdepend.h" #include "common/string.h" #include "compress_io.h" #include "dumputils.h" @@ -2974,6 +2976,17 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) int res = REQ_SCHEMA | REQ_DATA; RestoreOptions *ropt = AH->public.ropt; + /* + * For binary upgrade mode, dump pg_largeobject_metadata and the + * associated pg_shdepend rows. This is faster to restore than the + * equivalent set of large object commands. + */ + if (ropt->binary_upgrade && AH->public.remoteVersion >= 120000 && + strcmp(te->desc, "TABLE DATA") == 0 && + (te->catalogId.oid == LargeObjectMetadataRelationId || + te->catalogId.oid == SharedDependRelationId)) + return REQ_DATA; + /* These items are treated specially */ if (strcmp(te->desc, "ENCODING") == 0 || strcmp(te->desc, "STDSTRINGS") == 0 || diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e2e7975b34e..1666a4ec40f 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -49,8 +49,10 @@ #include "catalog/pg_class_d.h" #include "catalog/pg_default_acl_d.h" #include "catalog/pg_largeobject_d.h" +#include "catalog/pg_largeobject_metadata_d.h" #include "catalog/pg_proc_d.h" #include "catalog/pg_publication_d.h" +#include "catalog/pg_shdepend.h" #include "catalog/pg_subscription_d.h" #include "catalog/pg_type_d.h" #include "common/connect.h" @@ -209,6 +211,12 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* + * For binary upgrade, the dump ID of pg_largeobject_metadata is saved for use + * as a dependency for pg_shdepend and any large object comments/seclabels. + */ +static DumpId lo_metadata_dumpId; + /* Maximum number of relations to fetch in a fetchAttributeStats() call. */ #define MAX_ATTR_STATS_RELS 64 @@ -1083,6 +1091,34 @@ main(int argc, char **argv) if (!dopt.dumpData && dopt.sequence_data) getTableData(&dopt, tblinfo, numTables, RELKIND_SEQUENCE); + /* + * For binary upgrade mode, dump pg_largeobject_metadata and the + * associated pg_shdepend rows. This is faster to restore than the + * equivalent set of large object commands. + */ + if (dopt.binary_upgrade && fout->remoteVersion >= 120000) + { + TableInfo *lo_metadata = findTableByOid(LargeObjectMetadataRelationId); + TableInfo *shdepend = findTableByOid(SharedDependRelationId); + + makeTableDataInfo(&dopt, lo_metadata); + makeTableDataInfo(&dopt, shdepend); + + /* + * Save pg_largeobject_metadata's dump ID for use as a dependency on + * pg_shdepend and any large object comments/seclabels. + */ + lo_metadata_dumpId = lo_metadata->dataObj->dobj.dumpId; + addObjectDependency(&shdepend->dataObj->dobj, lo_metadata_dumpId); + + /* + * Only dump large object shdepend rows for this database. + */ + shdepend->dataObj->filtercond = "WHERE classid = 'pg_largeobject'::regclass " + "AND dbid = (SELECT oid FROM pg_database " + " WHERE datname = current_database())"; + } + /* * In binary-upgrade mode, we do not have to worry about the actual LO * data or the associated metadata that resides in the pg_largeobject and @@ -3922,10 +3958,29 @@ getLOs(Archive *fout) * as it will be copied by pg_upgrade, which simply copies the * pg_largeobject table. We *do* however dump out anything but the * data, as pg_upgrade copies just pg_largeobject, but not - * pg_largeobject_metadata, after the dump is restored. + * pg_largeobject_metadata, after the dump is restored. In versions + * before v12, this is done via proper large object commands. In + * newer versions, we dump the content of pg_largeobject_metadata and + * any associated pg_shdepend rows, which is faster to restore. */ if (dopt->binary_upgrade) - loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA; + { + if (fout->remoteVersion >= 120000) + { + loinfo->dobj.dump &= ~(DUMP_COMPONENT_DATA | DUMP_COMPONENT_ACL | DUMP_COMPONENT_DEFINITION); + + /* + * Mark the large object as dependent on + * pg_largeobject_metadata so that any large object + * comments/seclables are dumped after it. + */ + loinfo->dobj.dependencies = (DumpId *) pg_malloc(sizeof(DumpId)); + loinfo->dobj.dependencies[0] = lo_metadata_dumpId; + loinfo->dobj.nDeps = loinfo->dobj.allocDeps = 1; + } + else + loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA; + } /* * Create a "BLOBS" data item for the group, too. This is just a @@ -9034,8 +9089,18 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) if (tbinfo->relkind == RELKIND_SEQUENCE) continue; - /* Don't bother with uninteresting tables, either */ - if (!tbinfo->interesting) + /* + * Don't bother with uninteresting tables, either. For binary + * upgrades, this is bypassed for pg_largeobject_metadata and + * pg_shdepend so that the columns names are collected for the + * corresponding COPY commands. Restoring the data for those catalogs + * is faster than restoring the equivalent set of large object + * commands. + */ + if (!tbinfo->interesting && + !(fout->dopt->binary_upgrade && fout->remoteVersion >= 120000 && + (tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId || + tbinfo->dobj.catId.oid == SharedDependRelationId))) continue; /* OK, we need info for this table */ @@ -9232,7 +9297,10 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) pg_fatal("unrecognized table OID %u", attrelid); /* cross-check that we only got requested tables */ if (tbinfo->relkind == RELKIND_SEQUENCE || - !tbinfo->interesting) + (!tbinfo->interesting && + !(fout->dopt->binary_upgrade && fout->remoteVersion >= 120000 && + (tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId || + tbinfo->dobj.catId.oid == SharedDependRelationId)))) pg_fatal("unexpected column data for table \"%s\"", tbinfo->dobj.name); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 0b0977788f1..538e7dcb493 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -76,10 +76,10 @@ enum dbObjectTypePriorities PRIO_TABLE_ATTACH, PRIO_DUMMY_TYPE, PRIO_ATTRDEF, - PRIO_LARGE_OBJECT, PRIO_PRE_DATA_BOUNDARY, /* boundary! */ PRIO_TABLE_DATA, PRIO_SEQUENCE_SET, + PRIO_LARGE_OBJECT, PRIO_LARGE_OBJECT_DATA, PRIO_STATISTICS_DATA_DATA, PRIO_POST_DATA_BOUNDARY, /* boundary! */ diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 55d892d9c16..2417f537ac1 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -1029,6 +1029,7 @@ my %tests = ( test_schema_plus_large_objects => 1, }, unlike => { + binary_upgrade => 1, no_large_objects => 1, no_owner => 1, schema_only => 1, @@ -1517,6 +1518,7 @@ my %tests = ( test_schema_plus_large_objects => 1, }, unlike => { + binary_upgrade => 1, schema_only => 1, schema_only_with_statistics => 1, no_large_objects => 1, @@ -4524,9 +4526,9 @@ my %tests = ( no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, - binary_upgrade => 1, }, unlike => { + binary_upgrade => 1, no_large_objects => 1, no_privs => 1, schema_only => 1, -- 2.39.5 (Apple Git-154)