On Wed, Jul 06, 2022 at 08:25:04AM -0400, Robert Haas wrote: > On Wed, Jul 6, 2022 at 7:56 AM Justin Pryzby <pry...@telsasoft.com> wrote: > > I'm looking into it, but it'd help to hear suggestions about where to put > > it. > > My current ideas aren't very good. > > In main() there is a comment that begins "Most failures happen in > create_new_objects(), which has just completed at this point." I am > thinking you might want to insert a new function call just before that > comment, like remove_orphaned_files() or tidy_up_new_cluster(). > > Another option could be to do something at the beginning of > transfer_all_new_tablespaces().
That seems like the better option, since it has access to the custer's filenodes. I checked upgrades from 9.2, upgrades with/out vacuum full, and upgrades with a DB tablespace. Maybe it's a good idea to check that the file is empty before unlinking... -- Justin
>From dc4795c66916949493d51ff245c8e1509aaac05e Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sat, 2 Jul 2022 00:48:47 -0500 Subject: [PATCH] fix "Preserve relfilenodes" for pg_largeobject and its index An empty table is created by initdb, and the filenode was still pointing to that, so pg_largeobject was empty after binary upgrades. Test like: | ./tmp_install/usr/local/pgsql/bin/initdb --no-sync -D pg15b1.dat -k | ./tmp_install/usr/local/pgsql/bin/postgres -D pg15b1.dat -c logging_collector=no -p 5678 -k /tmp& | psql -h /tmp postgres -p 5678 -c '\lo_import /etc/shells' -c 'VACUUM FULL pg_largeobject' | rm -fr pg15b2.dat && ./tmp_install/usr/local/pgsql/bin/initdb --no-sync -k -D pg15b2.dat && ./tmp_install/usr/local/pgsql/bin/pg_upgrade -d pg15b1.dat -D pg15b2.dat -b ./tmp_install/usr/local/pgsql/bin | ./tmp_install/usr/local/pgsql/bin/postgres -D pg15b2.dat -c logging_collector=no -p 5678 -k /tmp& See also: 9a974cbcba005256a19991203583a94b4f9a21a9 https://www.postgresql.org/message-id/20210601140949.GC22012%40momjian.us https://www.postgresql.org/message-id/20220701231413.GI13040%40telsasoft.com --- src/bin/pg_dump/pg_dump.c | 37 ++++++++------ src/bin/pg_upgrade/relfilenumber.c | 68 +++++++++++++++++++++++--- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 2 + 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 227cdca2e2d..06d436f2f34 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3135,7 +3135,7 @@ dumpDatabase(Archive *fout) /* * pg_largeobject comes from the old system intact, so set its - * relfrozenxids and relminmxids. + * relfrozenxids, relminmxids and relfilenode. */ if (dopt->binary_upgrade) { @@ -3143,34 +3143,41 @@ dumpDatabase(Archive *fout) PQExpBuffer loFrozenQry = createPQExpBuffer(); PQExpBuffer loOutQry = createPQExpBuffer(); int i_relfrozenxid, + i_relfilenode, + i_oid, i_relminmxid; /* * pg_largeobject */ if (fout->remoteVersion >= 90300) - appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, relminmxid\n" + appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, relminmxid, relfilenode, oid\n" "FROM pg_catalog.pg_class\n" - "WHERE oid = %u;\n", - LargeObjectRelationId); + "WHERE oid IN (%u, %u);\n", + LargeObjectRelationId, LargeObjectLOidPNIndexId); else - appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, 0 AS relminmxid\n" + appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, 0 AS relminmxid, relfilenode, oid\n" "FROM pg_catalog.pg_class\n" - "WHERE oid = %u;\n", - LargeObjectRelationId); + "WHERE oid IN (%u, %u);\n", + LargeObjectRelationId, LargeObjectLOidPNIndexId); - lo_res = ExecuteSqlQueryForSingleRow(fout, loFrozenQry->data); + lo_res = ExecuteSqlQuery(fout, loFrozenQry->data, PGRES_TUPLES_OK); i_relfrozenxid = PQfnumber(lo_res, "relfrozenxid"); i_relminmxid = PQfnumber(lo_res, "relminmxid"); + i_relfilenode = PQfnumber(lo_res, "relfilenode"); + i_oid = PQfnumber(lo_res, "oid"); + + appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve values for pg_largeobject and its index\n"); + for (int i = 0; i < PQntuples(lo_res); ++i) + appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n" + "SET relfrozenxid = '%u', relminmxid = '%u', relfilenode = '%u'\n" + "WHERE oid = %u;\n", + atooid(PQgetvalue(lo_res, i, i_relfrozenxid)), + atooid(PQgetvalue(lo_res, i, i_relminmxid)), + atooid(PQgetvalue(lo_res, i, i_relfilenode)), + atooid(PQgetvalue(lo_res, i, i_oid))); - appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n"); - appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" - "WHERE oid = %u;\n", - atooid(PQgetvalue(lo_res, 0, i_relfrozenxid)), - atooid(PQgetvalue(lo_res, 0, i_relminmxid)), - LargeObjectRelationId); ArchiveEntry(fout, nilCatalogId, createDumpId(), ARCHIVE_OPTS(.tag = "pg_largeobject", .description = "pg_largeobject", diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index b3ad8209eca..0dd649aa0f5 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -13,9 +13,13 @@ #include "access/transam.h" #include "catalog/pg_class_d.h" +#include "catalog/pg_largeobject_d.h" #include "pg_upgrade.h" -static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); +static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace, + bool *has_lotable, bool *has_loindex); +static void remove_stray_largeobject_files(DbInfo *new_db, bool has_lotable, + bool has_loindex); static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit); @@ -98,6 +102,8 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, *new_db = NULL; FileNameMap *mappings; int n_maps; + bool has_lotable = false; + bool has_loindex = false; /* * Advance past any databases that exist in the new cluster but not in @@ -117,22 +123,66 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, mappings = gen_db_file_maps(old_db, new_db, &n_maps, old_pgdata, new_pgdata); - if (n_maps) - { - transfer_single_new_db(mappings, n_maps, old_tablespace); - } + transfer_single_new_db(mappings, n_maps, old_tablespace, + &has_lotable, &has_loindex); + + /* + * Remove the original pg_largeobject files created by initdb if they + * exist in the new cluster with different relfilenodes. This avoids + * leaving behind stray files unassociated with any relation. + */ + remove_stray_largeobject_files(new_db, has_lotable, has_loindex); + /* We allocate something even for n_maps == 0 */ pg_free(mappings); } } +/* + * If there's no relation in the cluster whose relfilenode is the OID of the + * largeobject table/index, then remove the stray file. It ought to have a + * single segment and fork. + */ +static void +remove_stray_largeobject_files(DbInfo *new_db, bool has_lotable, + bool has_loindex) +{ + char prefix[MAXPGPATH]; + char largeobj_file[MAXPGPATH]; + + if (new_db->db_tablespace[0] != '\0') + snprintf(prefix, sizeof(prefix), "%s/%s/%u", + new_db->db_tablespace, new_cluster.tablespace_suffix, + new_db->db_oid); + else + snprintf(prefix, sizeof(prefix), "%s/base/%u", + new_cluster.pgdata, new_db->db_oid); + + if (!has_lotable) + { + snprintf(largeobj_file, sizeof(largeobj_file), "%s/%u", + prefix, LargeObjectRelationId); + if (unlink(largeobj_file) != 0) + pg_fatal("Unable to remove %s.\n", largeobj_file); + } + + if (!has_loindex) + { + snprintf(largeobj_file, sizeof(largeobj_file), "%s/%u", + prefix, LargeObjectLOidPNIndexId); + if (unlink(largeobj_file) != 0) + pg_fatal("Unable to remove %s.\n", largeobj_file); + } +} + /* * transfer_single_new_db() * * create links for mappings stored in "maps" array. */ static void -transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) +transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace, + bool *has_lotable, bool *has_loindex) { int mapnum; bool vm_must_add_frozenbit = false; @@ -146,6 +196,12 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) for (mapnum = 0; mapnum < size; mapnum++) { + /* Keep track of whether a filenode matches the OID */ + if (maps[mapnum].relfilenumber == LargeObjectRelationId) + *has_lotable = true; + if (maps[mapnum].relfilenumber == LargeObjectLOidPNIndexId) + *has_loindex = true; + if (old_tablespace == NULL || strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) { diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 2f9b13bf0ae..9ed48c4e06a 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -208,6 +208,8 @@ if (defined($ENV{oldinstall})) } } +$oldnode->safe_psql("regression", "VACUUM FULL pg_largeobject;"); + # In a VPATH build, we'll be started in the source directory, but we want # to run pg_upgrade in the build directory so that any files generated finish # in it, like delete_old_cluster.{sh,bat}. -- 2.17.1