On Sat, Feb 12, 2022 at 07:37:30PM -0800, Andres Freund wrote:
> On 2022-02-12 20:17:46 -0600, Justin Pryzby wrote:
> > On Sat, Feb 12, 2022 at 06:00:44PM -0800, Andres Freund wrote:
> > > I bet using COW file copies would speed up our own regression tests 
> > > noticeably
> > > - on slower systems we spend a fair bit of time and space creating 
> > > template0
> > > and postgres, with the bulk of the data never changing.
> > > 
> > > Template databases are also fairly commonly used by application 
> > > developers to
> > > avoid the cost of rerunning all the setup DDL & initial data loading for
> > > different tests. Making that measurably cheaper would be a significant 
> > > win.
> > 
> > +1
> > 
> > I ran into this last week and was still thinking about proposing it.
> > 
> > Would this help CI
> 
> It could theoretically help linux - but currently I think the filesystem for
> CI is ext4, which doesn't support FICLONE. I assume it'd help macos, but I
> don't know the performance characteristics of copyfile(). I don't think any of
> the other OSs have working reflink / file clone support.
> 
> You could prototype it for CI on macos by using the "template initdb" patch
> and passing -c to cp.

Yes, copyfile() in CREATE DATABASE seems to help cirrus/darwin a bit.
https://cirrus-ci.com/task/5277139049119744

On xfs:
postgres=# CREATE DATABASE new3 TEMPLATE postgres STRATEGY FILE_COPY ;
2022-07-31 00:21:28.350 CDT [2347] LOG:  checkpoint starting: immediate force 
wait flush-all
...
CREATE DATABASE
Time: 1296.243 ms (00:01.296)

postgres=# CREATE DATABASE new4 TEMPLATE postgres STRATEGY FILE_CLONE;
2022-07-31 00:21:38.697 CDT [2347] LOG:  checkpoint starting: immediate force 
wait flush-all
...
CREATE DATABASE
Time: 167.152 ms

-- 
Justin
>From 02a8ef404bcd14ddcf7fe0d795b68c0234f9c014 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 30 Jul 2022 20:13:46 -0500
Subject: [PATCH] WIP: support file cloning in CREATE DATABASE

Note that "CLONE" is wal-logged the same as "COPY".

ci-os-only: macosx freebsd linux

See also:
9c08aea6a3090a396be334cc58c511edab05776a
ad43a413c4f7f5d024a5b2f51e00d280a22f1874

See also: 3a769d8239afdc003c91a56d2d8d5adfadacda5d, where it was added to
pg_upgrade.
---
 .cirrus.yml                        |  1 +
 src/backend/commands/dbcommands.c  | 21 ++++---
 src/backend/storage/file/copydir.c | 12 +++-
 src/bin/pg_upgrade/file.c          | 39 ++-----------
 src/bin/psql/tab-complete.c        |  2 +-
 src/include/port.h                 |  3 +
 src/include/storage/copydir.h      |  2 +-
 src/port/Makefile                  |  1 +
 src/port/fileclone.c               | 89 ++++++++++++++++++++++++++++++
 9 files changed, 125 insertions(+), 45 deletions(-)
 create mode 100644 src/port/fileclone.c

diff --git a/.cirrus.yml b/.cirrus.yml
index 81eb8a9996d..ca8621c5f7e 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -232,6 +232,7 @@ task:
     CCACHE_DIR: ${HOME}/ccache
     HOMEBREW_CACHE: ${HOME}/homebrew-cache
     PERL5LIB: ${HOME}/perl5/lib/perl5
+    PGCLONEFILE: 1
 
   only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*(macos|darwin|osx).*'
 
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 9f990a8d68f..bcd7a92f967 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -79,11 +79,15 @@
  * CREATEDB_FILE_COPY will simply perform a file system level copy of the
  * database and log a single record for each tablespace copied. To make this
  * safe, it also triggers checkpoints before and after the operation.
+ *
+ * CREATEDB_FILE_CLONE will make a copy-on-write clone of the file, if
+ * supported by the file system.
  */
 typedef enum CreateDBStrategy
 {
 	CREATEDB_WAL_LOG,
-	CREATEDB_FILE_COPY
+	CREATEDB_FILE_COPY,
+	CREATEDB_FILE_CLONE
 } CreateDBStrategy;
 
 typedef struct
@@ -137,7 +141,7 @@ static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
 static void CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid,
 									bool isRedo);
 static void CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dboid, Oid src_tsid,
-										Oid dst_tsid);
+										Oid dst_tsid, bool clone);
 static void recovery_create_dbdir(char *path, bool only_tblspc);
 
 /*
@@ -556,7 +560,7 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
  */
 static void
 CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
-							Oid dst_tsid)
+							Oid dst_tsid, bool clone)
 {
 	TableScanDesc scan;
 	Relation	rel;
@@ -616,7 +620,7 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
 		 *
 		 * We don't need to copy subdirectories
 		 */
-		copydir(srcpath, dstpath, false);
+		copydir(srcpath, dstpath, false, clone);
 
 		/* Record the filesystem change in XLOG */
 		{
@@ -1005,6 +1009,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 			dbstrategy = CREATEDB_WAL_LOG;
 		else if (strcmp(strategy, "file_copy") == 0)
 			dbstrategy = CREATEDB_FILE_COPY;
+		else if (strcmp(strategy, "file_clone") == 0)
+			dbstrategy = CREATEDB_FILE_CLONE;
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1388,7 +1394,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 									  dst_deftablespace);
 		else
 			CreateDatabaseUsingFileCopy(src_dboid, dboid, src_deftablespace,
-										dst_deftablespace);
+										dst_deftablespace,
+										dbstrategy == CREATEDB_FILE_CLONE);
 
 		/*
 		 * Close pg_database, but keep lock till commit.
@@ -2008,7 +2015,7 @@ movedb(const char *dbname, const char *tblspcname)
 		/*
 		 * Copy files from the old tablespace to the new one
 		 */
-		copydir(src_dbpath, dst_dbpath, false);
+		copydir(src_dbpath, dst_dbpath, false, false);
 
 		/*
 		 * Record the filesystem change in XLOG
@@ -3116,7 +3123,7 @@ dbase_redo(XLogReaderState *record)
 		 *
 		 * We don't need to copy subdirectories
 		 */
-		copydir(src_path, dst_path, false);
+		copydir(src_path, dst_path, false, false);
 
 		pfree(src_path);
 		pfree(dst_path);
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c
index 658fd95ba95..12cab90a453 100644
--- a/src/backend/storage/file/copydir.c
+++ b/src/backend/storage/file/copydir.c
@@ -20,6 +20,7 @@
 
 #include <fcntl.h>
 #include <unistd.h>
+#include <stdlib.h>
 #include <sys/stat.h>
 
 #include "miscadmin.h"
@@ -34,7 +35,7 @@
  * a directory or a regular file is ignored.
  */
 void
-copydir(char *fromdir, char *todir, bool recurse)
+copydir(char *fromdir, char *todir, bool recurse, bool clone)
 {
 	DIR		   *xldir;
 	struct dirent *xlde;
@@ -71,7 +72,14 @@ copydir(char *fromdir, char *todir, bool recurse)
 		{
 			/* recurse to handle subdirectories */
 			if (recurse)
-				copydir(fromfile, tofile, true);
+				copydir(fromfile, tofile, true, clone);
+		}
+		else if (S_ISREG(fst.st_mode) && (clone || getenv("PGCLONEFILE")))
+		{
+			if (clone_file(fromfile, tofile))
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not clone file \"%s\": %m", fromfile)));
 		}
 		else if (S_ISREG(fst.st_mode))
 			copy_file(fromfile, tofile);
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 079fbda8389..73b0ab373ae 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -14,10 +14,6 @@
 #ifdef HAVE_COPYFILE_H
 #include <copyfile.h>
 #endif
-#ifdef __linux__
-#include <sys/ioctl.h>
-#include <linux/fs.h>
-#endif
 
 #include "access/visibilitymapdefs.h"
 #include "common/file_perm.h"
@@ -38,36 +34,9 @@ void
 cloneFile(const char *src, const char *dst,
 		  const char *schemaName, const char *relName)
 {
-#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
-	if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
-		pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
-				 schemaName, relName, src, dst, strerror(errno));
-#elif defined(__linux__) && defined(FICLONE)
-	int			src_fd;
-	int			dest_fd;
-
-	if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
-		pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s",
-				 schemaName, relName, src, strerror(errno));
-
-	if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
-						pg_file_create_mode)) < 0)
-		pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s",
-				 schemaName, relName, dst, strerror(errno));
-
-	if (ioctl(dest_fd, FICLONE, src_fd) < 0)
-	{
-		int			save_errno = errno;
-
-		unlink(dst);
-
-		pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
-				 schemaName, relName, src, dst, strerror(save_errno));
-	}
-
-	close(src_fd);
-	close(dest_fd);
-#endif
+	if (clone_file(src, dst))
+		pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %m",
+				schemaName, relName, src, dst);
 }
 
 
@@ -316,6 +285,8 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
 	close(src_fd);
 }
 
+
+// move this too ?
 void
 check_file_clone(void)
 {
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 1392fea4eda..a65c1a6f36e 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2817,7 +2817,7 @@ psql_completion(const char *text, int start, int end)
 	else if (Matches("CREATE", "DATABASE", MatchAny, "TEMPLATE"))
 		COMPLETE_WITH_QUERY(Query_for_list_of_template_databases);
 	else if (Matches("CREATE", "DATABASE", MatchAny, "STRATEGY"))
-		COMPLETE_WITH("WAL_LOG", "FILE_COPY");
+		COMPLETE_WITH("WAL_LOG", "FILE_COPY", "FILE_CLONE");
 
 	/* CREATE DOMAIN */
 	else if (Matches("CREATE", "DOMAIN", MatchAny))
diff --git a/src/include/port.h b/src/include/port.h
index d39b04141f9..a99848ffea4 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -506,6 +506,9 @@ extern void *bsearch_arg(const void *key, const void *base,
 /* port/chklocale.c */
 extern int	pg_get_encoding_from_locale(const char *ctype, bool write_message);
 
+/* port/clone.c */
+extern int	clone_file(const char *src, const char *dst);
+
 #if defined(WIN32) && !defined(FRONTEND)
 extern int	pg_codepage_to_encoding(UINT cp);
 #endif
diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h
index 50a26edeb06..68e0691b387 100644
--- a/src/include/storage/copydir.h
+++ b/src/include/storage/copydir.h
@@ -13,7 +13,7 @@
 #ifndef COPYDIR_H
 #define COPYDIR_H
 
-extern void copydir(char *fromdir, char *todir, bool recurse);
+extern void copydir(char *fromdir, char *todir, bool recurse, bool clone);
 extern void copy_file(char *fromfile, char *tofile);
 
 #endif							/* COPYDIR_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index bfe1feb0d42..45418d679f6 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -42,6 +42,7 @@ OBJS = \
 	$(PG_CRC32C_OBJS) \
 	bsearch_arg.o \
 	chklocale.o \
+	fileclone.o \
 	inet_net_ntop.o \
 	noblock.o \
 	path.o \
diff --git a/src/port/fileclone.c b/src/port/fileclone.c
new file mode 100644
index 00000000000..6c85d2d7b9c
--- /dev/null
+++ b/src/port/fileclone.c
@@ -0,0 +1,89 @@
+/*
+ * XXX
+ *
+ * Copyright (c) 2021-2022, PostgreSQL Global Development Group
+ * Copyright (c) 1990 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. [rescinded 22 July 1999]
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * XXX
+ */
+
+#include "c.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifdef HAVE_COPYFILE_H
+#include <copyfile.h>
+#endif
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#endif
+
+#include "common/file_perm.h"
+
+/*
+ * cloneFile()
+ *
+ * Clones/reflinks a relation file from src to dst, returning nonzero on error.
+ */
+int
+clone_file(const char *src, const char *dst)
+{
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
+	if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
+		return 1;
+	return 0;
+#elif defined(__linux__) && defined(FICLONE)
+	int			src_fd;
+	int			dest_fd;
+
+	if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
+		return 1;
+
+	if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+						pg_file_create_mode)) < 0) // XXX: this causes linking errors on cirrusci
+		return 1;
+
+	if (ioctl(dest_fd, FICLONE, src_fd) < 0)
+	{
+		int save_errno = 0;
+		unlink(dst);
+		errno = save_errno;
+		return 1;
+	}
+
+	close(src_fd);
+	close(dest_fd);
+	return 0;
+#endif
+	errno = ENOTSUP;
+	return 1;
+}
-- 
2.17.1

Reply via email to