On Fri, Mar 8, 2019 at 7:43 PM Amit Kapila <amit.kapil...@gmail.com> wrote:
> Few minor comments:
> 1.
> warning C4715: 'new_cluster_needs_fsm': not all control paths return a value
>
> Getting this new warning in the patch.

Hmm, I don't get that in a couple versions of gcc. Your compiler must
not know that pg_fatal() cannot return. I blindly added a fix.

> 2.
>
> This comment line doesn't seem to be related to this patch.  If so, I
> think we can avoid having any additional change which is not related
> to the functionality of this patch.  Feel free to submit it
> separately, if you think it is an improvement.

> +
> + /* Transfer any VM files if we can trust their contents. */
>   if (vm_crashsafe_match)

Well, I guess the current comment is still ok, so reverted. If I were
to do a separate cleanup patch, I would rather remove the
vm_must_add_frozenbit parameter -- there's no reason I can see for
calls that transfer the heap and FSM to know about this.

I also changed references to the 'first segment of the main fork'
where there will almost always only be one segment. This was a vestige
of the earlier algorithm I had.

> 3. Can we add a note about this in the Notes section of pg_upgrade
> documentation [1]?

Done.

> Have you done any performance testing of this patch?  I mean to say
> now that we added a new stat call for each table, we should see if
> that has any impact.  Ideally, that should be compensated by the fact
> that we are now not transferring *fsm files for small relations.  How
> about constructing a test where all relations are greater than 4 pages
> and then try to upgrade them.  We can check for a cluster with a
> different number of relations say 10K, 20K, 50K, 100K.

I have not, but I agree it should be done. I will try to do so soon.

-- 
John Naylor                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From b65084daf15d198c9fdd986992b0147290c8e690 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnay...@gmail.com>
Date: Sun, 10 Mar 2019 22:01:21 +0800
Subject: [PATCH v23] During pg_upgrade, conditionally skip transfer of FSMs.

If a heap on the old cluster has 4 pages or fewer, and the old cluster
was PG v11 or earlier, don't copy or link the FSM. This will shrink
space usage for installations with large numbers of small tables.
---
 doc/src/sgml/ref/pgupgrade.sgml  |  7 ++++
 src/bin/pg_upgrade/info.c        | 16 +++++++--
 src/bin/pg_upgrade/pg_upgrade.h  |  6 ++++
 src/bin/pg_upgrade/relfilenode.c | 58 +++++++++++++++++++++++++++++++-
 4 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7e1afaf0a5..c896882dd1 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -792,6 +792,13 @@ psql --username=postgres --file=script.sql postgres
    is down.
   </para>
 
+  <para>
+   In <productname>PostgreSQL</productname> 12 and later small tables by
+   default don't have a free space map, as a space optimization.  If you are
+   upgrading a pre-12 cluster, the free space maps of small tables will
+   likewise not be transferred to the new cluster.  
+  </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 2f925f086c..902bfc647e 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
+	map->relpages = old_rel->relpages;
+	map->relkind = old_rel->relkind;
 
 	/*
 	 * old_relfilenode might differ from pg_class.oid (and hence
@@ -418,6 +420,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	char	   *nspname = NULL;
 	char	   *relname = NULL;
 	char	   *tablespace = NULL;
+	char	   *relkind = NULL;
 	int			i_spclocation,
 				i_nspname,
 				i_relname,
@@ -425,7 +428,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 				i_indtable,
 				i_toastheap,
 				i_relfilenode,
-				i_reltablespace;
+				i_reltablespace,
+				i_relpages,
+				i_relkind;
 	char		query[QUERY_ALLOC];
 	char	   *last_namespace = NULL,
 			   *last_tablespace = NULL;
@@ -494,7 +499,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	 */
 	snprintf(query + strlen(query), sizeof(query) - strlen(query),
 			 "SELECT all_rels.*, n.nspname, c.relname, "
-			 "  c.relfilenode, c.reltablespace, %s "
+			 "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
 			 "FROM (SELECT * FROM regular_heap "
 			 "      UNION ALL "
 			 "      SELECT * FROM toast_heap "
@@ -525,6 +530,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	i_relname = PQfnumber(res, "relname");
 	i_relfilenode = PQfnumber(res, "relfilenode");
 	i_reltablespace = PQfnumber(res, "reltablespace");
+	i_relpages = PQfnumber(res, "relpages");
+	i_relkind = PQfnumber(res, "relkind");
 	i_spclocation = PQfnumber(res, "spclocation");
 
 	for (relnum = 0; relnum < ntups; relnum++)
@@ -556,6 +563,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 		curr->relname = pg_strdup(relname);
 
 		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
+		curr->relpages = atoi(PQgetvalue(res, relnum, i_relpages));
+
+		relkind = PQgetvalue(res, relnum, i_relkind);
+		curr->relkind = relkind[0];
+
 		curr->tblsp_alloc = false;
 
 		/* Is the tablespace oid non-default? */
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 2f67eee22b..baeb8ff0f8 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -147,6 +147,8 @@ typedef struct
 	char	   *tablespace;		/* tablespace path; "" for cluster default */
 	bool		nsp_alloc;		/* should nspname be freed? */
 	bool		tblsp_alloc;	/* should tablespace be freed? */
+	int32		relpages;		/* # of pages -- see pg_class.h */
+	char		relkind;		/* relation kind -- see pg_class.h */
 } RelInfo;
 
 typedef struct
@@ -173,6 +175,10 @@ typedef struct
 	 */
 	Oid			old_relfilenode;
 	Oid			new_relfilenode;
+
+	int32		relpages;		/* # of pages -- see pg_class.h */
+	char		relkind;		/* relation kind -- see pg_class.h */
+
 	/* the rest are used only for logging and error reporting */
 	char	   *nspname;		/* namespaces */
 	char	   *relname;
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index 0c78073f0e..dafa661751 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -14,10 +14,12 @@
 #include <sys/stat.h>
 #include "catalog/pg_class_d.h"
 #include "access/transam.h"
+#include "storage/freespace.h"
 
 
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
 static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
+static bool new_cluster_needs_fsm(FileNameMap *map);
 
 
 /*
@@ -174,7 +176,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 				/*
 				 * Copy/link any fsm and vm files, if they exist
 				 */
-				transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+				if (new_cluster_needs_fsm(&maps[mapnum]))
+					transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
 				if (vm_crashsafe_match)
 					transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
 			}
@@ -278,3 +281,56 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 			}
 	}
 }
+
+/*
+ * Return false for small heaps if we're upgrading across PG 12, the first
+ * version where small heap relations don't have FSMs by default.
+ */
+static bool
+new_cluster_needs_fsm(FileNameMap *map)
+{
+	char		old_primary_file[MAXPGPATH];
+	struct stat statbuf;
+
+	if (!(GET_MAJOR_VERSION(old_cluster.major_version) < 1200 &&
+		GET_MAJOR_VERSION(new_cluster.major_version) >= 1200))
+		return true;
+
+	/* Always transfer FSMs of non-heap relations. */
+	if (map->relkind != RELKIND_RELATION &&
+		map->relkind != RELKIND_TOASTVALUE)
+		return true;
+
+	/*
+	 * If pg_class.relpages falsely reports that the heap is above the
+	 * threshold, we will transfer a FSM when we don't need to, but this
+	 * is harmless.
+	 */
+	if (map->relpages > HEAP_FSM_CREATION_THRESHOLD)
+		return true;
+
+	/* Determine path of the primary file. */
+	snprintf(old_primary_file, sizeof(old_primary_file), "%s%s/%u/%u",
+			 map->old_tablespace,
+			 map->old_tablespace_suffix,
+			 map->old_db_oid,
+			 map->old_relfilenode);
+
+	/*
+	 * If pg_class.relpages falsely reports that the heap is below the
+	 * threshold, a FSM would be skipped when we actually need it.  To guard
+	 * against this, we verify the size of the primary file.
+	 */
+	if (stat(old_primary_file, &statbuf) != 0)
+	{
+		pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\"): %s\n",
+				 map->nspname, map->relname, old_primary_file, strerror(errno));
+
+		/* Keep compiler quiet. */
+		return false;
+	}
+	else if (statbuf.st_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ)
+		return true;
+	else
+		return false;
+}
-- 
2.17.1

Reply via email to