I'm reposting this patch in a separate thread so I can make a separate commitfest entry for it. The previous discussion is mixed in with [0].

The purpose of this patch is to allow using pg_upgrade between clusters that have different checksum settings. When upgrading between instances with different checksum settings, the --copy (default) mode automatically sets (or unsets) the checksum on the fly.

This would be particularly useful if we switched to enabling checksums by default, as [0] proposes, but it's also useful without that.

Some discussion points:

- We have added a bunch of different transfer modes to pg_upgrade in order to give the user control over the expected file transfer performance. Here, I have added this checksum rewriting to the existing --copy mode, and I have measured about a 5% overhead. An alternative would be to make this an explicit mode (--copy-slow, --copy-and-make-adjustments).

- Windows has a separate code path in the --copy mode. I don't know the reasons or advantages of that. So it's not clear how the checksum rewriting mode should be handled in that case. We could switch to the non-Windows code path in that case, but then the performance difference between the normal path and the checksum-rewriting path is even more unclear.


[0]: https://www.postgresql.org/message-id/flat/cakanmmkwimhik5ahmbedf5vqzbobbcwepho4-pioweabzwc...@mail.gmail.com
From a050f2d857b2e5321b9e1110f6a41185d91c51ee Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 22 Aug 2024 07:56:04 +0200
Subject: [PATCH v1] pg_upgrade: Support for upgrading to checksums enabled

When upgrading between instances with different checksum settings, the
--copy (default) mode automatically sets (or unsets) the checksum on
the fly.

TODO: What to do about the Windows code path?
---
 src/bin/pg_upgrade/controldata.c   | 17 +++++-----------
 src/bin/pg_upgrade/file.c          | 31 +++++++++++++++++++++++++++++-
 src/bin/pg_upgrade/pg_upgrade.h    |  3 ++-
 src/bin/pg_upgrade/relfilenumber.c |  2 +-
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index 854c6887a23..583cbf109cf 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -693,18 +693,11 @@ check_control_data(ControlData *oldctrl,
         * check_for_isn_and_int8_passing_mismatch().
         */
 
-       /*
-        * We might eventually allow upgrades from checksum to no-checksum
-        * clusters.
-        */
-       if (oldctrl->data_checksum_version == 0 &&
-               newctrl->data_checksum_version != 0)
-               pg_fatal("old cluster does not use data checksums but the new 
one does");
-       else if (oldctrl->data_checksum_version != 0 &&
-                        newctrl->data_checksum_version == 0)
-               pg_fatal("old cluster uses data checksums but the new one does 
not");
-       else if (oldctrl->data_checksum_version != 
newctrl->data_checksum_version)
-               pg_fatal("old and new cluster pg_controldata checksum versions 
do not match");
+       if (oldctrl->data_checksum_version != newctrl->data_checksum_version)
+       {
+               if (user_opts.transfer_mode != TRANSFER_MODE_COPY)
+                       pg_fatal("when upgrading between clusters with 
different data checksum settings, transfer mode --copy must be used");
+       }
 }
 
 
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 73932504cae..0a16ad79237 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -80,12 +80,14 @@ cloneFile(const char *src, const char *dst,
  */
 void
 copyFile(const char *src, const char *dst,
-                const char *schemaName, const char *relName)
+                const char *schemaName, const char *relName,
+                int segno)
 {
 #ifndef WIN32
        int                     src_fd;
        int                     dest_fd;
        char       *buffer;
+       BlockNumber blocknum;
 
        if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
                pg_fatal("error while copying relation \"%s.%s\": could not 
open file \"%s\": %m",
@@ -101,6 +103,8 @@ copyFile(const char *src, const char *dst,
 
        buffer = (char *) pg_malloc(COPY_BUF_SIZE);
 
+       blocknum = segno * old_cluster.controldata.largesz;
+
        /* perform data copying i.e read src source, write to destination */
        while (true)
        {
@@ -113,6 +117,31 @@ copyFile(const char *src, const char *dst,
                if (nbytes == 0)
                        break;
 
+               /*
+                * Update checksums if upgrading between different settings
+                */
+               if (old_cluster.controldata.data_checksum_version != 
new_cluster.controldata.data_checksum_version)
+               {
+                       /* must deal in whole blocks */
+                       nbytes = nbytes / BLCKSZ * BLCKSZ;
+
+                       for (int i = 0; i * BLCKSZ < nbytes; i++)
+                       {
+                               char       *page = buffer + i * BLCKSZ;
+                               PageHeader      header = (PageHeader) page;
+
+                               if (PageIsNew(page))
+                                       continue;
+
+                               if 
(new_cluster.controldata.data_checksum_version == 1)
+                                       header->pd_checksum = 
pg_checksum_page(page, blocknum);
+                               else
+                                       header->pd_checksum = 0;
+
+                               blocknum++;
+                       }
+               }
+
                errno = 0;
                if (write(dest_fd, buffer, nbytes) != nbytes)
                {
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index cdb6e2b7597..c6a4ade53a7 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -404,7 +404,8 @@ bool                pid_lock_file_exists(const char 
*datadir);
 void           cloneFile(const char *src, const char *dst,
                                          const char *schemaName, const char 
*relName);
 void           copyFile(const char *src, const char *dst,
-                                        const char *schemaName, const char 
*relName);
+                                        const char *schemaName, const char 
*relName,
+                                        int segno);
 void           copyFileByRange(const char *src, const char *dst,
                                                        const char *schemaName, 
const char *relName);
 void           linkFile(const char *src, const char *dst,
diff --git a/src/bin/pg_upgrade/relfilenumber.c 
b/src/bin/pg_upgrade/relfilenumber.c
index 1d3054d78bd..2ae69d71159 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -250,7 +250,7 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, 
bool vm_must_add_fro
                                case TRANSFER_MODE_COPY:
                                        pg_log(PG_VERBOSE, "copying \"%s\" to 
\"%s\"",
                                                   old_file, new_file);
-                                       copyFile(old_file, new_file, 
map->nspname, map->relname);
+                                       copyFile(old_file, new_file, 
map->nspname, map->relname, segno);
                                        break;
                                case TRANSFER_MODE_COPY_FILE_RANGE:
                                        pg_log(PG_VERBOSE, "copying \"%s\" to 
\"%s\" with copy_file_range",

base-commit: 9bb842f95ef3384f0822c386a4c569780e613e4e
-- 
2.46.0

Reply via email to