Hi, Rebased version of the patch is attached.
-- Regards, Nazir Bilal Yavuz Microsoft
From a419004a26410c9ad9348e2f1420695db8ca35b6 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Thu, 8 Aug 2024 15:01:48 +0300 Subject: [PATCH v9] Introduce file_copy_method GUC This GUC can be set to either COPY (default) or CLONE (if system supports it). If CLONE method is chosen, similar to COPY; but attempting to use efficient file copying system calls. The kernel has the opportunity to share block ranges in copy-on-write file systems, or maybe push down the copy to network file systems and storage devices. Currently works on Linux, FreeBSD and macOS. More systems could be supported. Author: Thomas Munro <thomas.mu...@gmail.com> Author: Nazir Bilal Yavuz <byavu...@gmail.com> Reviewed-by: Robert Haas <robertmh...@gmail.com> Reviewed-by: Ranier Vilela <ranier...@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGLM%2Bt%2BSwBU-cHeMUXJCOgBxSHLGZutV5zCwY4qrCcE02w%40mail.gmail.com --- src/include/storage/copydir.h | 9 ++ src/backend/storage/file/copydir.c | 86 ++++++++++++++++++- .../utils/activity/wait_event_names.txt | 1 + src/backend/utils/misc/guc_tables.c | 19 ++++ src/backend/utils/misc/postgresql.conf.sample | 4 + doc/src/sgml/config.sgml | 46 ++++++++++ doc/src/sgml/ref/alter_database.sgml | 3 +- doc/src/sgml/ref/create_database.sgml | 4 +- src/tools/pgindent/typedefs.list | 1 + 9 files changed, 170 insertions(+), 3 deletions(-) diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h index a25e258f479..6edc3ea4f69 100644 --- a/src/include/storage/copydir.h +++ b/src/include/storage/copydir.h @@ -13,6 +13,15 @@ #ifndef COPYDIR_H #define COPYDIR_H +typedef enum FileCopyMethod +{ + FILE_COPY_METHOD_COPY, + FILE_COPY_METHOD_CLONE, +} FileCopyMethod; + +/* GUC parameters */ +extern PGDLLIMPORT int file_copy_method; + extern void copydir(const char *fromdir, const char *todir, bool recurse); extern void copy_file(const char *fromfile, const char *tofile); diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index d4fbe542077..0a9c6fb62c0 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -21,17 +21,30 @@ #include <fcntl.h> #include <unistd.h> +#ifdef HAVE_COPYFILE_H +#include <copyfile.h> +#endif + #include "common/file_utils.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/copydir.h" #include "storage/fd.h" +/* GUCs */ +int file_copy_method = FILE_COPY_METHOD_COPY; + +static void clone_file(const char *fromfile, const char *tofile); + /* * copydir: copy a directory * * If recurse is false, subdirectories are ignored. Anything that's not * a directory or a regular file is ignored. + * + * This function uses a file_copy_method GUC to determine copy method. + * Uses of this function must be documented in the list of places + * affected by this GUC. */ void copydir(const char *fromdir, const char *todir, bool recurse) @@ -71,7 +84,12 @@ copydir(const char *fromdir, const char *todir, bool recurse) copydir(fromfile, tofile, true); } else if (xlde_type == PGFILETYPE_REG) - copy_file(fromfile, tofile); + { + if (file_copy_method == FILE_COPY_METHOD_CLONE) + clone_file(fromfile, tofile); + else + copy_file(fromfile, tofile); + } } FreeDir(xldir); @@ -214,3 +232,69 @@ copy_file(const char *fromfile, const char *tofile) pfree(buffer); } + +/* + * clone one file + */ +static void +clone_file(const char *fromfile, const char *tofile) +{ +#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) + if (copyfile(fromfile, tofile, NULL, COPYFILE_CLONE_FORCE) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not clone file \"%s\" to \"%s\": %m", + fromfile, tofile))); +#elif defined(HAVE_COPY_FILE_RANGE) + int srcfd; + int dstfd; + ssize_t nbytes; + + srcfd = OpenTransientFile(fromfile, O_RDONLY | PG_BINARY); + if (srcfd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", fromfile))); + + dstfd = OpenTransientFile(tofile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY); + if (dstfd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", tofile))); + + do + { + /* If we got a cancel signal during the copy of the file, quit */ + CHECK_FOR_INTERRUPTS(); + + /* + * Don't copy too much at once, so we can check for interrupts from + * time to time if this falls back to a slow copy. + */ + pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_COPY); + nbytes = copy_file_range(srcfd, NULL, dstfd, NULL, 1024 * 1024, 0); + if (nbytes < 0 && errno != EINTR) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not clone file \"%s\" to \"%s\": %m", + fromfile, tofile))); + pgstat_report_wait_end(); + } + while (nbytes != 0); + + if (CloseTransientFile(dstfd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", tofile))); + + if (CloseTransientFile(srcfd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", fromfile))); +#else + /* + * If there is no CLONE support, this function should not be called. + */ + pg_unreachable(); +#endif +} diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index d10ca723dc8..71649666047 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -202,6 +202,7 @@ CONTROL_FILE_SYNC "Waiting for the <filename>pg_control</filename> file to reach CONTROL_FILE_SYNC_UPDATE "Waiting for an update to the <filename>pg_control</filename> file to reach durable storage." CONTROL_FILE_WRITE "Waiting for a write to the <filename>pg_control</filename> file." CONTROL_FILE_WRITE_UPDATE "Waiting for a write to update the <filename>pg_control</filename> file." +COPY_FILE_COPY "Waiting for a file copy operation." COPY_FILE_READ "Waiting for a read during a file copy operation." COPY_FILE_WRITE "Waiting for a write during a file copy operation." DATA_FILE_EXTEND "Waiting for a relation data file to be extended." diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index c0a52cdcc3e..5255116b424 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -73,6 +73,7 @@ #include "replication/syncrep.h" #include "storage/bufmgr.h" #include "storage/bufpage.h" +#include "storage/copydir.h" #include "storage/large_object.h" #include "storage/pg_shmem.h" #include "storage/predicate.h" @@ -474,6 +475,14 @@ static const struct config_enum_entry wal_compression_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry file_copy_method_options[] = { + {"copy", FILE_COPY_METHOD_COPY, false}, +#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) || defined(HAVE_COPY_FILE_RANGE) + {"clone", FILE_COPY_METHOD_CLONE, false}, +#endif + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -5049,6 +5058,16 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"file_copy_method", PGC_USERSET, RESOURCES_DISK, + gettext_noop("Selects the file copy method."), + NULL + }, + &file_copy_method, + FILE_COPY_METHOD_COPY, file_copy_method_options, + NULL, NULL, NULL + }, + { {"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS, gettext_noop("Selects the method used for forcing WAL updates to disk."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 9ec9f97e926..2b69ebc0344 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -178,6 +178,10 @@ #max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated # for NOTIFY / LISTEN queue +#file_copy_method = copy # the default is the first option + # copy + # clone (if your system supports) + # - Kernel Resources - #max_files_per_process = 1000 # min 64 diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a1a1d58a436..bde161d5d68 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2286,6 +2286,52 @@ include_dir 'conf.d' </listitem> </varlistentry> + <varlistentry id="guc_file_copy_method" xreflabel="file_copy_method"> + <term><varname>file_copy_method</varname> (<type>enum</type>) + <indexterm> + <primary><varname>file_copy_method</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the copy method that will be used while copying files. + Possible values are <literal>COPY</literal> (default) and + <literal>CLONE</literal> (if your system supports). + </para> + + <para> + This parameter controls the method of the copying process in: + </para> + <itemizedlist> + <listitem> + <para> + <literal>FILE_COPY</literal> strategy in <command>CREATE DATABASE ... STRATEGY=FILE_COPY</command> + </para> + </listitem> + <listitem> + <para> + <command> ALTER DATABASE ... SET TABLESPACE ... </command> + </para> + </listitem> + </itemizedlist> + + <para> + The <literal>CLONE</literal> method works the same way as + <literal>COPY</literal> method, except that it uses efficient file + cloning (also known as <quote>reflinks</quote> on + some systems) instead of copying files to the new data directory, + which can result in near-instantaneous copying of the data files. + </para> + + <para> + File cloning is only supported on some operating systems and file + systems. At present, it is supported on Linux (kernel 4.5 or + later) with Btrfs and XFS (on file systems created with reflink + support), and on macOS with APFS. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages"> <term><varname>max_notify_queue_pages</varname> (<type>integer</type>) <indexterm> diff --git a/doc/src/sgml/ref/alter_database.sgml b/doc/src/sgml/ref/alter_database.sgml index 2479c41e8d6..9d8ec677555 100644 --- a/doc/src/sgml/ref/alter_database.sgml +++ b/doc/src/sgml/ref/alter_database.sgml @@ -82,7 +82,8 @@ ALTER DATABASE <replaceable class="parameter">name</replaceable> RESET ALL default tablespace to the new tablespace. The new default tablespace must be empty for this database, and no one can be connected to the database. Tables and indexes in non-default tablespaces are - unaffected. + unaffected. The copy method used while moving could be changed by + <xref linkend="guc_file_copy_method"/> option. </para> <para> diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 7653cb902ee..62f57eb4c32 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -138,7 +138,9 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable> log volume substantially, especially if the template database is large, it also forces the system to perform a checkpoint both before and after the creation of the new database. In some situations, this may - have a noticeable negative impact on overall system performance. + have a noticeable negative impact on overall system performance. The + method used in <literal>FILE_COPY</literal> strategy could be changed + by <xref linkend="guc_file_copy_method"/> option. </para> </listitem> </varlistentry> diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 547d14b3e7c..6cedc7af0c2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -772,6 +772,7 @@ FieldSelect FieldStore File FileBackupMethod +FileCopyMethod FileFdwExecutionState FileFdwPlanState FileNameMap -- 2.45.2