On Fri, Mar 8, 2019 at 7:43 PM Amit Kapila <amit.kapil...@gmail.com> wrote: > Few minor comments: > 1. > warning C4715: 'new_cluster_needs_fsm': not all control paths return a value > > Getting this new warning in the patch.
Hmm, I don't get that in a couple versions of gcc. Your compiler must not know that pg_fatal() cannot return. I blindly added a fix. > 2. > > This comment line doesn't seem to be related to this patch. If so, I > think we can avoid having any additional change which is not related > to the functionality of this patch. Feel free to submit it > separately, if you think it is an improvement. > + > + /* Transfer any VM files if we can trust their contents. */ > if (vm_crashsafe_match) Well, I guess the current comment is still ok, so reverted. If I were to do a separate cleanup patch, I would rather remove the vm_must_add_frozenbit parameter -- there's no reason I can see for calls that transfer the heap and FSM to know about this. I also changed references to the 'first segment of the main fork' where there will almost always only be one segment. This was a vestige of the earlier algorithm I had. > 3. Can we add a note about this in the Notes section of pg_upgrade > documentation [1]? Done. > Have you done any performance testing of this patch? I mean to say > now that we added a new stat call for each table, we should see if > that has any impact. Ideally, that should be compensated by the fact > that we are now not transferring *fsm files for small relations. How > about constructing a test where all relations are greater than 4 pages > and then try to upgrade them. We can check for a cluster with a > different number of relations say 10K, 20K, 50K, 100K. I have not, but I agree it should be done. I will try to do so soon. -- John Naylor https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From b65084daf15d198c9fdd986992b0147290c8e690 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Sun, 10 Mar 2019 22:01:21 +0800 Subject: [PATCH v23] During pg_upgrade, conditionally skip transfer of FSMs. If a heap on the old cluster has 4 pages or fewer, and the old cluster was PG v11 or earlier, don't copy or link the FSM. This will shrink space usage for installations with large numbers of small tables. --- doc/src/sgml/ref/pgupgrade.sgml | 7 ++++ src/bin/pg_upgrade/info.c | 16 +++++++-- src/bin/pg_upgrade/pg_upgrade.h | 6 ++++ src/bin/pg_upgrade/relfilenode.c | 58 +++++++++++++++++++++++++++++++- 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 7e1afaf0a5..c896882dd1 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -792,6 +792,13 @@ psql --username=postgres --file=script.sql postgres is down. </para> + <para> + In <productname>PostgreSQL</productname> 12 and later small tables by + default don't have a free space map, as a space optimization. If you are + upgrading a pre-12 cluster, the free space maps of small tables will + likewise not be transferred to the new cluster. + </para> + </refsect1> <refsect1> diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 2f925f086c..902bfc647e 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data, map->old_db_oid = old_db->db_oid; map->new_db_oid = new_db->db_oid; + map->relpages = old_rel->relpages; + map->relkind = old_rel->relkind; /* * old_relfilenode might differ from pg_class.oid (and hence @@ -418,6 +420,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) char *nspname = NULL; char *relname = NULL; char *tablespace = NULL; + char *relkind = NULL; int i_spclocation, i_nspname, i_relname, @@ -425,7 +428,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) i_indtable, i_toastheap, i_relfilenode, - i_reltablespace; + i_reltablespace, + i_relpages, + i_relkind; char query[QUERY_ALLOC]; char *last_namespace = NULL, *last_tablespace = NULL; @@ -494,7 +499,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) */ snprintf(query + strlen(query), sizeof(query) - strlen(query), "SELECT all_rels.*, n.nspname, c.relname, " - " c.relfilenode, c.reltablespace, %s " + " c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s " "FROM (SELECT * FROM regular_heap " " UNION ALL " " SELECT * FROM toast_heap " @@ -525,6 +530,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) i_relname = PQfnumber(res, "relname"); i_relfilenode = PQfnumber(res, "relfilenode"); i_reltablespace = PQfnumber(res, "reltablespace"); + i_relpages = PQfnumber(res, "relpages"); + i_relkind = PQfnumber(res, "relkind"); i_spclocation = PQfnumber(res, "spclocation"); for (relnum = 0; relnum < ntups; relnum++) @@ -556,6 +563,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) curr->relname = pg_strdup(relname); curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode)); + curr->relpages = atoi(PQgetvalue(res, relnum, i_relpages)); + + relkind = PQgetvalue(res, relnum, i_relkind); + curr->relkind = relkind[0]; + curr->tblsp_alloc = false; /* Is the tablespace oid non-default? */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 2f67eee22b..baeb8ff0f8 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -147,6 +147,8 @@ typedef struct char *tablespace; /* tablespace path; "" for cluster default */ bool nsp_alloc; /* should nspname be freed? */ bool tblsp_alloc; /* should tablespace be freed? */ + int32 relpages; /* # of pages -- see pg_class.h */ + char relkind; /* relation kind -- see pg_class.h */ } RelInfo; typedef struct @@ -173,6 +175,10 @@ typedef struct */ Oid old_relfilenode; Oid new_relfilenode; + + int32 relpages; /* # of pages -- see pg_class.h */ + char relkind; /* relation kind -- see pg_class.h */ + /* the rest are used only for logging and error reporting */ char *nspname; /* namespaces */ char *relname; diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c index 0c78073f0e..dafa661751 100644 --- a/src/bin/pg_upgrade/relfilenode.c +++ b/src/bin/pg_upgrade/relfilenode.c @@ -14,10 +14,12 @@ #include <sys/stat.h> #include "catalog/pg_class_d.h" #include "access/transam.h" +#include "storage/freespace.h" static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit); +static bool new_cluster_needs_fsm(FileNameMap *map); /* @@ -174,7 +176,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) /* * Copy/link any fsm and vm files, if they exist */ - transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit); + if (new_cluster_needs_fsm(&maps[mapnum])) + transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit); if (vm_crashsafe_match) transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit); } @@ -278,3 +281,56 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro } } } + +/* + * Return false for small heaps if we're upgrading across PG 12, the first + * version where small heap relations don't have FSMs by default. + */ +static bool +new_cluster_needs_fsm(FileNameMap *map) +{ + char old_primary_file[MAXPGPATH]; + struct stat statbuf; + + if (!(GET_MAJOR_VERSION(old_cluster.major_version) < 1200 && + GET_MAJOR_VERSION(new_cluster.major_version) >= 1200)) + return true; + + /* Always transfer FSMs of non-heap relations. */ + if (map->relkind != RELKIND_RELATION && + map->relkind != RELKIND_TOASTVALUE) + return true; + + /* + * If pg_class.relpages falsely reports that the heap is above the + * threshold, we will transfer a FSM when we don't need to, but this + * is harmless. + */ + if (map->relpages > HEAP_FSM_CREATION_THRESHOLD) + return true; + + /* Determine path of the primary file. */ + snprintf(old_primary_file, sizeof(old_primary_file), "%s%s/%u/%u", + map->old_tablespace, + map->old_tablespace_suffix, + map->old_db_oid, + map->old_relfilenode); + + /* + * If pg_class.relpages falsely reports that the heap is below the + * threshold, a FSM would be skipped when we actually need it. To guard + * against this, we verify the size of the primary file. + */ + if (stat(old_primary_file, &statbuf) != 0) + { + pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\"): %s\n", + map->nspname, map->relname, old_primary_file, strerror(errno)); + + /* Keep compiler quiet. */ + return false; + } + else if (statbuf.st_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ) + return true; + else + return false; +} -- 2.17.1