On Wed, Jul 06, 2022 at 08:25:04AM -0400, Robert Haas wrote:
> On Wed, Jul 6, 2022 at 7:56 AM Justin Pryzby <pry...@telsasoft.com> wrote:
> > I'm looking into it, but it'd help to hear suggestions about where to put 
> > it.
> > My current ideas aren't very good.
> 
> In main() there is a comment that begins "Most failures happen in
> create_new_objects(), which has just completed at this point." I am
> thinking you might want to insert a new function call just before that
> comment, like remove_orphaned_files() or tidy_up_new_cluster().
> 
> Another option could be to do something at the beginning of
> transfer_all_new_tablespaces().

That seems like the better option, since it has access to the custer's
filenodes.

I checked upgrades from 9.2, upgrades with/out vacuum full, and upgrades with a
DB tablespace.

Maybe it's a good idea to check that the file is empty before unlinking...

-- 
Justin
>From dc4795c66916949493d51ff245c8e1509aaac05e Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 2 Jul 2022 00:48:47 -0500
Subject: [PATCH] fix "Preserve relfilenodes" for pg_largeobject and its index

An empty table is created by initdb, and the filenode was still pointing to
that, so pg_largeobject was empty after binary upgrades.

Test like:
| ./tmp_install/usr/local/pgsql/bin/initdb --no-sync -D pg15b1.dat -k
| ./tmp_install/usr/local/pgsql/bin/postgres -D pg15b1.dat -c logging_collector=no -p 5678 -k /tmp&
| psql -h /tmp postgres -p 5678 -c '\lo_import /etc/shells' -c 'VACUUM FULL pg_largeobject'
| rm -fr pg15b2.dat && ./tmp_install/usr/local/pgsql/bin/initdb --no-sync -k -D pg15b2.dat && ./tmp_install/usr/local/pgsql/bin/pg_upgrade -d pg15b1.dat -D pg15b2.dat -b ./tmp_install/usr/local/pgsql/bin
| ./tmp_install/usr/local/pgsql/bin/postgres -D pg15b2.dat -c logging_collector=no -p 5678 -k /tmp&

See also:
9a974cbcba005256a19991203583a94b4f9a21a9
https://www.postgresql.org/message-id/20210601140949.GC22012%40momjian.us
https://www.postgresql.org/message-id/20220701231413.GI13040%40telsasoft.com
---
 src/bin/pg_dump/pg_dump.c              | 37 ++++++++------
 src/bin/pg_upgrade/relfilenumber.c     | 68 +++++++++++++++++++++++---
 src/bin/pg_upgrade/t/002_pg_upgrade.pl |  2 +
 3 files changed, 86 insertions(+), 21 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 227cdca2e2d..06d436f2f34 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3135,7 +3135,7 @@ dumpDatabase(Archive *fout)
 
 	/*
 	 * pg_largeobject comes from the old system intact, so set its
-	 * relfrozenxids and relminmxids.
+	 * relfrozenxids, relminmxids and relfilenode.
 	 */
 	if (dopt->binary_upgrade)
 	{
@@ -3143,34 +3143,41 @@ dumpDatabase(Archive *fout)
 		PQExpBuffer loFrozenQry = createPQExpBuffer();
 		PQExpBuffer loOutQry = createPQExpBuffer();
 		int			i_relfrozenxid,
+					i_relfilenode,
+					i_oid,
 					i_relminmxid;
 
 		/*
 		 * pg_largeobject
 		 */
 		if (fout->remoteVersion >= 90300)
-			appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, relminmxid\n"
+			appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, relminmxid, relfilenode, oid\n"
 							  "FROM pg_catalog.pg_class\n"
-							  "WHERE oid = %u;\n",
-							  LargeObjectRelationId);
+							  "WHERE oid IN (%u, %u);\n",
+							  LargeObjectRelationId, LargeObjectLOidPNIndexId);
 		else
-			appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, 0 AS relminmxid\n"
+			appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, 0 AS relminmxid, relfilenode, oid\n"
 							  "FROM pg_catalog.pg_class\n"
-							  "WHERE oid = %u;\n",
-							  LargeObjectRelationId);
+							  "WHERE oid IN (%u, %u);\n",
+							  LargeObjectRelationId, LargeObjectLOidPNIndexId);
 
-		lo_res = ExecuteSqlQueryForSingleRow(fout, loFrozenQry->data);
+		lo_res = ExecuteSqlQuery(fout, loFrozenQry->data, PGRES_TUPLES_OK);
 
 		i_relfrozenxid = PQfnumber(lo_res, "relfrozenxid");
 		i_relminmxid = PQfnumber(lo_res, "relminmxid");
+		i_relfilenode = PQfnumber(lo_res, "relfilenode");
+		i_oid = PQfnumber(lo_res, "oid");
+
+		appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve values for pg_largeobject and its index\n");
+		for (int i = 0; i < PQntuples(lo_res); ++i)
+			appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n"
+							  "SET relfrozenxid = '%u', relminmxid = '%u', relfilenode = '%u'\n"
+							  "WHERE oid = %u;\n",
+							  atooid(PQgetvalue(lo_res, i, i_relfrozenxid)),
+							  atooid(PQgetvalue(lo_res, i, i_relminmxid)),
+							  atooid(PQgetvalue(lo_res, i, i_relfilenode)),
+							  atooid(PQgetvalue(lo_res, i, i_oid)));
 
-		appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n");
-		appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n"
-						  "SET relfrozenxid = '%u', relminmxid = '%u'\n"
-						  "WHERE oid = %u;\n",
-						  atooid(PQgetvalue(lo_res, 0, i_relfrozenxid)),
-						  atooid(PQgetvalue(lo_res, 0, i_relminmxid)),
-						  LargeObjectRelationId);
 		ArchiveEntry(fout, nilCatalogId, createDumpId(),
 					 ARCHIVE_OPTS(.tag = "pg_largeobject",
 								  .description = "pg_largeobject",
diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c
index b3ad8209eca..0dd649aa0f5 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -13,9 +13,13 @@
 
 #include "access/transam.h"
 #include "catalog/pg_class_d.h"
+#include "catalog/pg_largeobject_d.h"
 #include "pg_upgrade.h"
 
-static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
+static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace,
+	bool *has_lotable, bool *has_loindex);
+static void remove_stray_largeobject_files(DbInfo *new_db, bool has_lotable,
+		bool has_loindex);
 static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
 
 
@@ -98,6 +102,8 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
 				   *new_db = NULL;
 		FileNameMap *mappings;
 		int			n_maps;
+		bool		has_lotable = false;
+		bool		has_loindex = false;
 
 		/*
 		 * Advance past any databases that exist in the new cluster but not in
@@ -117,22 +123,66 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
 
 		mappings = gen_db_file_maps(old_db, new_db, &n_maps, old_pgdata,
 									new_pgdata);
-		if (n_maps)
-		{
-			transfer_single_new_db(mappings, n_maps, old_tablespace);
-		}
+		transfer_single_new_db(mappings, n_maps, old_tablespace,
+			&has_lotable, &has_loindex);
+
+		/*
+		 * Remove the original pg_largeobject files created by initdb if they
+		 * exist in the new cluster with different relfilenodes.  This avoids
+		 * leaving behind stray files unassociated with any relation.
+		 */
+		remove_stray_largeobject_files(new_db, has_lotable, has_loindex);
+
 		/* We allocate something even for n_maps == 0 */
 		pg_free(mappings);
 	}
 }
 
+/*
+ * If there's no relation in the cluster whose relfilenode is the OID of the
+ * largeobject table/index, then remove the stray file.  It ought to have a
+ * single segment and fork.
+ */
+static void
+remove_stray_largeobject_files(DbInfo *new_db, bool has_lotable,
+		bool has_loindex)
+{
+	char		prefix[MAXPGPATH];
+	char		largeobj_file[MAXPGPATH];
+
+	if (new_db->db_tablespace[0] != '\0')
+		snprintf(prefix, sizeof(prefix), "%s/%s/%u",
+				new_db->db_tablespace, new_cluster.tablespace_suffix,
+				new_db->db_oid);
+	else
+		snprintf(prefix, sizeof(prefix), "%s/base/%u",
+				new_cluster.pgdata, new_db->db_oid);
+
+	if (!has_lotable)
+	{
+		snprintf(largeobj_file, sizeof(largeobj_file), "%s/%u",
+				prefix, LargeObjectRelationId);
+		if (unlink(largeobj_file) != 0)
+			pg_fatal("Unable to remove %s.\n", largeobj_file);
+	}
+
+	if (!has_loindex)
+	{
+		snprintf(largeobj_file, sizeof(largeobj_file), "%s/%u",
+				prefix, LargeObjectLOidPNIndexId);
+		if (unlink(largeobj_file) != 0)
+			pg_fatal("Unable to remove %s.\n", largeobj_file);
+	}
+}
+
 /*
  * transfer_single_new_db()
  *
  * create links for mappings stored in "maps" array.
  */
 static void
-transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
+transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace,
+	bool *has_lotable, bool *has_loindex)
 {
 	int			mapnum;
 	bool		vm_must_add_frozenbit = false;
@@ -146,6 +196,12 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 
 	for (mapnum = 0; mapnum < size; mapnum++)
 	{
+		/* Keep track of whether a filenode matches the OID */
+		if (maps[mapnum].relfilenumber == LargeObjectRelationId)
+			*has_lotable = true;
+		if (maps[mapnum].relfilenumber == LargeObjectLOidPNIndexId)
+			*has_loindex = true;
+
 		if (old_tablespace == NULL ||
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 2f9b13bf0ae..9ed48c4e06a 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -208,6 +208,8 @@ if (defined($ENV{oldinstall}))
 	}
 }
 
+$oldnode->safe_psql("regression", "VACUUM FULL pg_largeobject;");
+
 # In a VPATH build, we'll be started in the source directory, but we want
 # to run pg_upgrade in the build directory so that any files generated finish
 # in it, like delete_old_cluster.{sh,bat}.
-- 
2.17.1

Reply via email to