On Tue, Apr 08, 2025 at 01:07:09PM -0400, Tom Lane wrote: > Nathan Bossart <nathandboss...@gmail.com> writes: >> I do think it's worth considering going back to copying >> pg_largobject_metadata's files for upgrades from v16 and newer. > > (If we do this) I don't see why we'd need to stop at v16. I'm > envisioning that we'd use COPY, which will be dealing in the > text representation of aclitems, and I don't think that's changed > in a long time. The sort of thing that would break it is changes > in the set of available/default privilege bits for large objects. > > That is, where the dump currently contains something like > > SELECT pg_catalog.lo_create('2121'); > ALTER LARGE OBJECT 2121 OWNER TO postgres; > GRANT ALL ON LARGE OBJECT 2121 TO joe; > > we'd have > > COPY pg_largeobject_metadata FROM STDIN; > ... > 2121 10 {postgres=rw/postgres,joe=rw/postgres} > ... > > and some appropriate COPY data for pg_shdepend too.
Attached is a proof-of-concept grade patch for using COPY for pg_largeobject_metadata and the relevant pg_shdepend entries. On my laptop, pg_upgrade with 10M LOs (each with a non-bootstrap-superuser owner and another role with SELECT rights) goes from ~8.5 minutes to ~1 minute with this patch. I originally set out to invent a new lo_create_with_owner() function and teach pg_dump to batch those together in large groups, but as I started the required pg_dump surgery, I was quickly scared away by the complexity. Next, I gave COPY a try. The improvements from using COPY will likely be limited to the pg_upgrade case, but that's the only case I regularly hear complaints about for zillions of large objects, so maybe it's good enough for now. For the COPY approach, I modified pg_dump to dump the contents of pg_largeobject_metadata. This is easy enough, but I ran into problems with the dependent comments and security labels. It turns out that even before v12, we run all the lo_create() commands just so that creating the comments and security labels works (AFAICT). So I suspect upgrading with many large objects has always been slow. The comment/security label dependency issue can be fixed (at least well enough for the tests) by moving PRIO_LARGE_OBJECT below PRIO_TABLE_DATA. There might be an existing issue here, because dbObjectTypePriorities has the following comment: * NOTE: object-type priorities must match the section assignments made in * pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY, * POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects * must sort between them. But dumpLO() puts large objects in SECTION_DATA, and PRIO_LARGE_OBJECT is before PRIO_PRE_DATA_BOUNDARY. I admittedly haven't spent too much time investigating this, though. In any case, it might be a good idea to also make sure we explicitly mark the large objects and their comments/seclabels as dependent on the pg_largeobject_metadata data. That leaves pg_shdepend. For now, I've just instructed pg_upgrade to COPY the relevant pg_shdepend rows as an independent step, but perhaps there's a reasonably straightforward way to put that in pg_dump, too. -- nathan
>From 59fa6558a9258719840e4d423c86313937156f95 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Mon, 5 May 2025 14:11:56 -0500 Subject: [PATCH v1 1/1] pg_upgrade: Use COPY for large object metadata. --- src/bin/pg_dump/pg_backup_archiver.c | 5 +++++ src/bin/pg_dump/pg_dump.c | 16 +++++++++++++++- src/bin/pg_dump/pg_dump_sort.c | 2 +- src/bin/pg_dump/t/002_pg_dump.pl | 4 +++- src/bin/pg_upgrade/dump.c | 9 +++++++++ src/bin/pg_upgrade/pg_upgrade.c | 9 +++++++++ 6 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index afa42337b11..0d61155ec0a 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -2974,6 +2974,11 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) int res = REQ_SCHEMA | REQ_DATA; RestoreOptions *ropt = AH->public.ropt; + if (ropt->binary_upgrade && + strcmp(te->tag, "pg_largeobject_metadata") == 0 && + strcmp(te->namespace, "pg_catalog") == 0) + return REQ_DATA; + /* These items are treated specially */ if (strcmp(te->desc, "ENCODING") == 0 || strcmp(te->desc, "STDSTRINGS") == 0 || diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e2e7975b34e..312e1010456 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -1083,6 +1083,20 @@ main(int argc, char **argv) if (!dopt.dumpData && dopt.sequence_data) getTableData(&dopt, tblinfo, numTables, RELKIND_SEQUENCE); + if (dopt.binary_upgrade) + { + for (int i = 0; i < numTables; i++) + { + if (tblinfo[i].relkind == RELKIND_RELATION && + strcmp(tblinfo[i].dobj.name, "pg_largeobject_metadata") == 0 && + strcmp(tblinfo[i].dobj.namespace->dobj.name, "pg_catalog") == 0) + { + makeTableDataInfo(&dopt, &(tblinfo[i])); + break; + } + } + } + /* * In binary-upgrade mode, we do not have to worry about the actual LO * data or the associated metadata that resides in the pg_largeobject and @@ -3925,7 +3939,7 @@ getLOs(Archive *fout) * pg_largeobject_metadata, after the dump is restored. */ if (dopt->binary_upgrade) - loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA; + loinfo->dobj.dump &= ~(DUMP_COMPONENT_DATA | DUMP_COMPONENT_ACL | DUMP_COMPONENT_DEFINITION); /* * Create a "BLOBS" data item for the group, too. This is just a diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 0b0977788f1..538e7dcb493 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -76,10 +76,10 @@ enum dbObjectTypePriorities PRIO_TABLE_ATTACH, PRIO_DUMMY_TYPE, PRIO_ATTRDEF, - PRIO_LARGE_OBJECT, PRIO_PRE_DATA_BOUNDARY, /* boundary! */ PRIO_TABLE_DATA, PRIO_SEQUENCE_SET, + PRIO_LARGE_OBJECT, PRIO_LARGE_OBJECT_DATA, PRIO_STATISTICS_DATA_DATA, PRIO_POST_DATA_BOUNDARY, /* boundary! */ diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 55d892d9c16..2417f537ac1 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -1029,6 +1029,7 @@ my %tests = ( test_schema_plus_large_objects => 1, }, unlike => { + binary_upgrade => 1, no_large_objects => 1, no_owner => 1, schema_only => 1, @@ -1517,6 +1518,7 @@ my %tests = ( test_schema_plus_large_objects => 1, }, unlike => { + binary_upgrade => 1, schema_only => 1, schema_only_with_statistics => 1, no_large_objects => 1, @@ -4524,9 +4526,9 @@ my %tests = ( no_schema => 1, section_data => 1, test_schema_plus_large_objects => 1, - binary_upgrade => 1, }, unlike => { + binary_upgrade => 1, no_large_objects => 1, no_privs => 1, schema_only => 1, diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 23cb08e8347..e5b73b7e7a0 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -15,6 +15,9 @@ void generate_old_dump(void) { + char *path = psprintf("%s/lo_shdep.out", realpath(log_opts.dumpdir, NULL)); + PQExpBuffer buf = createPQExpBuffer(); + PGconn *conn; int dbnum; prep_status("Creating dump of global objects"); @@ -70,5 +73,11 @@ generate_old_dump(void) ; end_progress_output(); + + conn = connectToServer(&old_cluster, "template1"); + appendStringLiteralConn(buf, path, conn); + PQclear(executeQueryOrDie(conn, "COPY (SELECT * FROM pg_shdepend WHERE classid = 'pg_largeobject'::regclass) TO %s", buf->data)); + destroyPQExpBuffer(buf); + PQfinish(conn); check_ok(); } diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 536e49d2616..7a6234d48c0 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -571,6 +571,8 @@ prepare_new_globals(void) static void create_new_objects(void) { + char *path = psprintf("%s/lo_shdep.out", realpath(log_opts.dumpdir, NULL)); + PQExpBuffer buf = createPQExpBuffer(); int dbnum; PGconn *conn_new_template1; @@ -688,6 +690,13 @@ create_new_objects(void) ; end_progress_output(); + + conn_new_template1 = connectToServer(&new_cluster, "template1"); + appendStringLiteralConn(buf, path, conn_new_template1); + PQclear(executeQueryOrDie(conn_new_template1, "COPY pg_shdepend FROM %s", buf->data)); + destroyPQExpBuffer(buf); + PQfinish(conn_new_template1); + check_ok(); /* -- 2.39.5 (Apple Git-154)