On Sat, Feb 12, 2022 at 07:37:30PM -0800, Andres Freund wrote: > On 2022-02-12 20:17:46 -0600, Justin Pryzby wrote: > > On Sat, Feb 12, 2022 at 06:00:44PM -0800, Andres Freund wrote: > > > I bet using COW file copies would speed up our own regression tests > > > noticeably > > > - on slower systems we spend a fair bit of time and space creating > > > template0 > > > and postgres, with the bulk of the data never changing. > > > > > > Template databases are also fairly commonly used by application > > > developers to > > > avoid the cost of rerunning all the setup DDL & initial data loading for > > > different tests. Making that measurably cheaper would be a significant > > > win. > > > > +1 > > > > I ran into this last week and was still thinking about proposing it. > > > > Would this help CI > > It could theoretically help linux - but currently I think the filesystem for > CI is ext4, which doesn't support FICLONE. I assume it'd help macos, but I > don't know the performance characteristics of copyfile(). I don't think any of > the other OSs have working reflink / file clone support. > > You could prototype it for CI on macos by using the "template initdb" patch > and passing -c to cp.
Yes, copyfile() in CREATE DATABASE seems to help cirrus/darwin a bit. https://cirrus-ci.com/task/5277139049119744 On xfs: postgres=# CREATE DATABASE new3 TEMPLATE postgres STRATEGY FILE_COPY ; 2022-07-31 00:21:28.350 CDT [2347] LOG: checkpoint starting: immediate force wait flush-all ... CREATE DATABASE Time: 1296.243 ms (00:01.296) postgres=# CREATE DATABASE new4 TEMPLATE postgres STRATEGY FILE_CLONE; 2022-07-31 00:21:38.697 CDT [2347] LOG: checkpoint starting: immediate force wait flush-all ... CREATE DATABASE Time: 167.152 ms -- Justin
>From 02a8ef404bcd14ddcf7fe0d795b68c0234f9c014 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sat, 30 Jul 2022 20:13:46 -0500 Subject: [PATCH] WIP: support file cloning in CREATE DATABASE Note that "CLONE" is wal-logged the same as "COPY". ci-os-only: macosx freebsd linux See also: 9c08aea6a3090a396be334cc58c511edab05776a ad43a413c4f7f5d024a5b2f51e00d280a22f1874 See also: 3a769d8239afdc003c91a56d2d8d5adfadacda5d, where it was added to pg_upgrade. --- .cirrus.yml | 1 + src/backend/commands/dbcommands.c | 21 ++++--- src/backend/storage/file/copydir.c | 12 +++- src/bin/pg_upgrade/file.c | 39 ++----------- src/bin/psql/tab-complete.c | 2 +- src/include/port.h | 3 + src/include/storage/copydir.h | 2 +- src/port/Makefile | 1 + src/port/fileclone.c | 89 ++++++++++++++++++++++++++++++ 9 files changed, 125 insertions(+), 45 deletions(-) create mode 100644 src/port/fileclone.c diff --git a/.cirrus.yml b/.cirrus.yml index 81eb8a9996d..ca8621c5f7e 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -232,6 +232,7 @@ task: CCACHE_DIR: ${HOME}/ccache HOMEBREW_CACHE: ${HOME}/homebrew-cache PERL5LIB: ${HOME}/perl5/lib/perl5 + PGCLONEFILE: 1 only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*(macos|darwin|osx).*' diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 9f990a8d68f..bcd7a92f967 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -79,11 +79,15 @@ * CREATEDB_FILE_COPY will simply perform a file system level copy of the * database and log a single record for each tablespace copied. To make this * safe, it also triggers checkpoints before and after the operation. + * + * CREATEDB_FILE_CLONE will make a copy-on-write clone of the file, if + * supported by the file system. */ typedef enum CreateDBStrategy { CREATEDB_WAL_LOG, - CREATEDB_FILE_COPY + CREATEDB_FILE_COPY, + CREATEDB_FILE_CLONE } CreateDBStrategy; typedef struct @@ -137,7 +141,7 @@ static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo); static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid, - Oid dst_tsid); + Oid dst_tsid, bool clone); static void recovery_create_dbdir(char *path, bool only_tblspc); /* @@ -556,7 +560,7 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) */ static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, - Oid dst_tsid) + Oid dst_tsid, bool clone) { TableScanDesc scan; Relation rel; @@ -616,7 +620,7 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, * * We don't need to copy subdirectories */ - copydir(srcpath, dstpath, false); + copydir(srcpath, dstpath, false, clone); /* Record the filesystem change in XLOG */ { @@ -1005,6 +1009,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbstrategy = CREATEDB_WAL_LOG; else if (strcmp(strategy, "file_copy") == 0) dbstrategy = CREATEDB_FILE_COPY; + else if (strcmp(strategy, "file_clone") == 0) + dbstrategy = CREATEDB_FILE_CLONE; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -1388,7 +1394,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dst_deftablespace); else CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace, - dst_deftablespace); + dst_deftablespace, + dbstrategy == CREATEDB_FILE_CLONE); /* * Close pg_database, but keep lock till commit. @@ -2008,7 +2015,7 @@ movedb(const char *dbname, const char *tblspcname) /* * Copy files from the old tablespace to the new one */ - copydir(src_dbpath, dst_dbpath, false); + copydir(src_dbpath, dst_dbpath, false, false); /* * Record the filesystem change in XLOG @@ -3116,7 +3123,7 @@ dbase_redo(XLogReaderState *record) * * We don't need to copy subdirectories */ - copydir(src_path, dst_path, false); + copydir(src_path, dst_path, false, false); pfree(src_path); pfree(dst_path); diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index 658fd95ba95..12cab90a453 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -20,6 +20,7 @@ #include <fcntl.h> #include <unistd.h> +#include <stdlib.h> #include <sys/stat.h> #include "miscadmin.h" @@ -34,7 +35,7 @@ * a directory or a regular file is ignored. */ void -copydir(char *fromdir, char *todir, bool recurse) +copydir(char *fromdir, char *todir, bool recurse, bool clone) { DIR *xldir; struct dirent *xlde; @@ -71,7 +72,14 @@ copydir(char *fromdir, char *todir, bool recurse) { /* recurse to handle subdirectories */ if (recurse) - copydir(fromfile, tofile, true); + copydir(fromfile, tofile, true, clone); + } + else if (S_ISREG(fst.st_mode) && (clone || getenv("PGCLONEFILE"))) + { + if (clone_file(fromfile, tofile)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not clone file \"%s\": %m", fromfile))); } else if (S_ISREG(fst.st_mode)) copy_file(fromfile, tofile); diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 079fbda8389..73b0ab373ae 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -14,10 +14,6 @@ #ifdef HAVE_COPYFILE_H #include <copyfile.h> #endif -#ifdef __linux__ -#include <sys/ioctl.h> -#include <linux/fs.h> -#endif #include "access/visibilitymapdefs.h" #include "common/file_perm.h" @@ -38,36 +34,9 @@ void cloneFile(const char *src, const char *dst, const char *schemaName, const char *relName) { -#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) - if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0) - pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s", - schemaName, relName, src, dst, strerror(errno)); -#elif defined(__linux__) && defined(FICLONE) - int src_fd; - int dest_fd; - - if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) - pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s", - schemaName, relName, src, strerror(errno)); - - if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, - pg_file_create_mode)) < 0) - pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s", - schemaName, relName, dst, strerror(errno)); - - if (ioctl(dest_fd, FICLONE, src_fd) < 0) - { - int save_errno = errno; - - unlink(dst); - - pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s", - schemaName, relName, src, dst, strerror(save_errno)); - } - - close(src_fd); - close(dest_fd); -#endif + if (clone_file(src, dst)) + pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %m", + schemaName, relName, src, dst); } @@ -316,6 +285,8 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, close(src_fd); } + +// move this too ? void check_file_clone(void) { diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 1392fea4eda..a65c1a6f36e 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2817,7 +2817,7 @@ psql_completion(const char *text, int start, int end) else if (Matches("CREATE", "DATABASE", MatchAny, "TEMPLATE")) COMPLETE_WITH_QUERY(Query_for_list_of_template_databases); else if (Matches("CREATE", "DATABASE", MatchAny, "STRATEGY")) - COMPLETE_WITH("WAL_LOG", "FILE_COPY"); + COMPLETE_WITH("WAL_LOG", "FILE_COPY", "FILE_CLONE"); /* CREATE DOMAIN */ else if (Matches("CREATE", "DOMAIN", MatchAny)) diff --git a/src/include/port.h b/src/include/port.h index d39b04141f9..a99848ffea4 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -506,6 +506,9 @@ extern void *bsearch_arg(const void *key, const void *base, /* port/chklocale.c */ extern int pg_get_encoding_from_locale(const char *ctype, bool write_message); +/* port/clone.c */ +extern int clone_file(const char *src, const char *dst); + #if defined(WIN32) && !defined(FRONTEND) extern int pg_codepage_to_encoding(UINT cp); #endif diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h index 50a26edeb06..68e0691b387 100644 --- a/src/include/storage/copydir.h +++ b/src/include/storage/copydir.h @@ -13,7 +13,7 @@ #ifndef COPYDIR_H #define COPYDIR_H -extern void copydir(char *fromdir, char *todir, bool recurse); +extern void copydir(char *fromdir, char *todir, bool recurse, bool clone); extern void copy_file(char *fromfile, char *tofile); #endif /* COPYDIR_H */ diff --git a/src/port/Makefile b/src/port/Makefile index bfe1feb0d42..45418d679f6 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -42,6 +42,7 @@ OBJS = \ $(PG_CRC32C_OBJS) \ bsearch_arg.o \ chklocale.o \ + fileclone.o \ inet_net_ntop.o \ noblock.o \ path.o \ diff --git a/src/port/fileclone.c b/src/port/fileclone.c new file mode 100644 index 00000000000..6c85d2d7b9c --- /dev/null +++ b/src/port/fileclone.c @@ -0,0 +1,89 @@ +/* + * XXX + * + * Copyright (c) 2021-2022, PostgreSQL Global Development Group + * Copyright (c) 1990 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. [rescinded 22 July 1999] + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * XXX + */ + +#include "c.h" + +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#ifdef HAVE_COPYFILE_H +#include <copyfile.h> +#endif +#ifdef __linux__ +#include <sys/ioctl.h> +#include <linux/fs.h> +#endif + +#include "common/file_perm.h" + +/* + * cloneFile() + * + * Clones/reflinks a relation file from src to dst, returning nonzero on error. + */ +int +clone_file(const char *src, const char *dst) +{ +#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) + if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0) + return 1; + return 0; +#elif defined(__linux__) && defined(FICLONE) + int src_fd; + int dest_fd; + + if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0) + return 1; + + if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) // XXX: this causes linking errors on cirrusci + return 1; + + if (ioctl(dest_fd, FICLONE, src_fd) < 0) + { + int save_errno = 0; + unlink(dst); + errno = save_errno; + return 1; + } + + close(src_fd); + close(dest_fd); + return 0; +#endif + errno = ENOTSUP; + return 1; +} -- 2.17.1