On Tue, Nov 8, 2022 at 3:18 AM Nathan Bossart <nathandboss...@gmail.com> wrote:
>
> The call to snprintf() should take care of adding a terminating null byte
> in the right place.

Ah, my bad. MemSet is avoided in v5 patch setting only the first byte.

> > So, IIUC, your point here is what if the copy_file fails to create the
> > temp file when it already exists. With the name collision being a rare
> > scenario, given the pid and timestamp variables, I'm not sure if
> > copy_file can ever fail because the temp file already exists (with
> > errno EEXIST). However, if we want to be extra-cautious, checking if
> > temp file exists with file_exists() before calling copy_file() might
> > help avoid such cases. If we don't want to have extra system call (via
> > file_exists()) to check the temp file existence, we can think of
> > sending a flag to copy_file(src, dst, &is_dst_file_created) and use
> > is_dst_file_created in the shutdown callback. Thoughts?
>
> Presently, if copy_file() encounters a pre-existing file, it should ERROR,
> which will be caught in the sigsetjmp() block in basic_archive_file().  The
> shutdown callback shouldn't run in this scenario.

Determining the "file already exists" error/EEXIST case from a bunch
of other errors in copy_file() is tricky. However, I quickly hacked up
copy_file() by adding elevel parameter, please see the attached
v5-0001.

> I think this cleanup logic should run in both the shutdown callback and the
> sigsetjmp() block, but it should only take action (i.e., deleting the
> leftover temporary file) if the ERROR or shutdown occurs after creating the
> file in copy_file() and before renaming the temporary file to its final
> destination.

Please see attached v5 patch set.

If the direction seems okay, I'll add a CF entry.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From cd805d6d26e66dcfc51ca6632a0147827a5b9544 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 9 Nov 2022 08:29:19 +0000
Subject: [PATCH v5] Refactor copy_file() to remove leftover temp files in
 basic_archive

---
 contrib/basic_archive/basic_archive.c | 23 ++++++++++-
 src/backend/storage/file/copydir.c    | 56 ++++++++++++++++++++-------
 src/backend/storage/file/reinit.c     |  2 +-
 src/include/storage/copydir.h         |  2 +-
 4 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
index 9f221816bb..1b9f48eeed 100644
--- a/contrib/basic_archive/basic_archive.c
+++ b/contrib/basic_archive/basic_archive.c
@@ -275,14 +275,33 @@ basic_archive_file_internal(const char *file, const char *path)
 	 * Copy the file to its temporary destination.  Note that this will fail
 	 * if temp already exists.
 	 */
-	copy_file(unconstify(char *, path), temp);
+	if (copy_file(unconstify(char *, path), temp, LOG) != 0)
+	{
+		/* Remove the leftover temporary file. */
+		if (errno != EEXIST)
+			unlink(temp);
+
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not copy file \"%s\" to temporary file \"%s\": %m",
+						path, temp)));
+	}
 
 	/*
 	 * Sync the temporary file to disk and move it to its final destination.
 	 * Note that this will overwrite any existing file, but this is only
 	 * possible if someone else created the file since the stat() above.
 	 */
-	(void) durable_rename(temp, destination, ERROR);
+	if (durable_rename(temp, destination, LOG) != 0)
+	{
+		/* Remove the leftover temporary file. */
+		unlink(temp);
+
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not rename temporary file \"%s\" to \"%s\": %m",
+						temp, destination)));
+	}
 
 	ereport(DEBUG1,
 			(errmsg("archived \"%s\" via basic_archive", file)));
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c
index 0cea4cbc89..ee20c83949 100644
--- a/src/backend/storage/file/copydir.c
+++ b/src/backend/storage/file/copydir.c
@@ -71,7 +71,7 @@ copydir(char *fromdir, char *todir, bool recurse)
 				copydir(fromfile, tofile, true);
 		}
 		else if (xlde_type == PGFILETYPE_REG)
-			copy_file(fromfile, tofile);
+			copy_file(fromfile, tofile, ERROR);
 	}
 	FreeDir(xldir);
 
@@ -113,8 +113,8 @@ copydir(char *fromdir, char *todir, bool recurse)
 /*
  * copy one file
  */
-void
-copy_file(char *fromfile, char *tofile)
+int
+copy_file(char *fromfile, char *tofile, int elevel)
 {
 	char	   *buffer;
 	int			srcfd;
@@ -138,28 +138,35 @@ copy_file(char *fromfile, char *tofile)
 #define FLUSH_DISTANCE (1024 * 1024)
 #endif
 
-	/* Use palloc to ensure we get a maxaligned buffer */
-	buffer = palloc(COPY_BUF_SIZE);
-
 	/*
 	 * Open the files
 	 */
 	srcfd = OpenTransientFile(fromfile, O_RDONLY | PG_BINARY);
 	if (srcfd < 0)
-		ereport(ERROR,
+	{
+		ereport(elevel,
 				(errcode_for_file_access(),
 				 errmsg("could not open file \"%s\": %m", fromfile)));
+		return -1;
+	}
 
 	dstfd = OpenTransientFile(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
 	if (dstfd < 0)
-		ereport(ERROR,
+	{
+		ereport(elevel,
 				(errcode_for_file_access(),
 				 errmsg("could not create file \"%s\": %m", tofile)));
+		return -1;
+	}
 
 	/*
 	 * Do the data copying.
 	 */
 	flush_offset = 0;
+
+	/* Use palloc to ensure we get a maxaligned buffer */
+	buffer = palloc(COPY_BUF_SIZE);
+
 	for (offset = 0;; offset += nbytes)
 	{
 		/* If we got a cancel signal during the copy of the file, quit */
@@ -180,37 +187,60 @@ copy_file(char *fromfile, char *tofile)
 		nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
 		pgstat_report_wait_end();
 		if (nbytes < 0)
-			ereport(ERROR,
+		{
+			ereport(elevel,
 					(errcode_for_file_access(),
 					 errmsg("could not read file \"%s\": %m", fromfile)));
+			pfree(buffer);
+			CloseTransientFile(srcfd);
+			CloseTransientFile(dstfd);
+			return -1;
+		}
 		if (nbytes == 0)
 			break;
 		errno = 0;
 		pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_WRITE);
 		if ((int) write(dstfd, buffer, nbytes) != nbytes)
 		{
+			pgstat_report_wait_end();
+
 			/* if write didn't set errno, assume problem is no disk space */
 			if (errno == 0)
 				errno = ENOSPC;
-			ereport(ERROR,
+			ereport(elevel,
 					(errcode_for_file_access(),
 					 errmsg("could not write to file \"%s\": %m", tofile)));
+
+			pfree(buffer);
+			CloseTransientFile(srcfd);
+			CloseTransientFile(dstfd);
+			return -1;
 		}
 		pgstat_report_wait_end();
 	}
 
+	pfree(buffer);
+
 	if (offset > flush_offset)
 		pg_flush_data(dstfd, flush_offset, offset - flush_offset);
 
 	if (CloseTransientFile(dstfd) != 0)
-		ereport(ERROR,
+	{
+		ereport(elevel,
 				(errcode_for_file_access(),
 				 errmsg("could not close file \"%s\": %m", tofile)));
 
+		CloseTransientFile(srcfd);
+		return -1;
+	}
+
 	if (CloseTransientFile(srcfd) != 0)
-		ereport(ERROR,
+	{
+		ereport(elevel,
 				(errcode_for_file_access(),
 				 errmsg("could not close file \"%s\": %m", fromfile)));
+		return -1;
+	}
 
-	pfree(buffer);
+	return 0;
 }
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index 647c458b52..0a12e57eb8 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -312,7 +312,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
 			/* OK, we're ready to perform the actual copy. */
 			elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
-			copy_file(srcpath, dstpath);
+			copy_file(srcpath, dstpath, ERROR);
 		}
 
 		FreeDir(dbspace_dir);
diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h
index 50a26edeb0..68fbbd12a3 100644
--- a/src/include/storage/copydir.h
+++ b/src/include/storage/copydir.h
@@ -14,6 +14,6 @@
 #define COPYDIR_H
 
 extern void copydir(char *fromdir, char *todir, bool recurse);
-extern void copy_file(char *fromfile, char *tofile);
+extern int copy_file(char *fromfile, char *tofile, int elevel);
 
 #endif							/* COPYDIR_H */
-- 
2.34.1

From 22acbf446a4d57fbf1f78985c6c130dcf9871d39 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 9 Nov 2022 08:44:46 +0000
Subject: [PATCH v5] Add basic_archive shutdown callback to remove leftover
 temporary files

---
 contrib/basic_archive/basic_archive.c | 38 +++++++++++++++++++++------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
index 1b9f48eeed..19b48818cb 100644
--- a/contrib/basic_archive/basic_archive.c
+++ b/contrib/basic_archive/basic_archive.c
@@ -42,10 +42,12 @@ PG_MODULE_MAGIC;
 
 static char *archive_directory = NULL;
 static MemoryContext basic_archive_context;
+static char	tempfilepath[MAXPGPATH + 256];
 
 static bool basic_archive_configured(void);
 static bool basic_archive_file(const char *file, const char *path);
 static void basic_archive_file_internal(const char *file, const char *path);
+static void basic_archive_shutdown(void);
 static bool check_archive_directory(char **newval, void **extra, GucSource source);
 static bool compare_files(const char *file1, const char *file2);
 
@@ -85,6 +87,7 @@ _PG_archive_module_init(ArchiveModuleCallbacks *cb)
 
 	cb->check_configured_cb = basic_archive_configured;
 	cb->archive_file_cb = basic_archive_file;
+	cb->shutdown_cb = basic_archive_shutdown;
 }
 
 /*
@@ -215,7 +218,6 @@ static void
 basic_archive_file_internal(const char *file, const char *path)
 {
 	char		destination[MAXPGPATH];
-	char		temp[MAXPGPATH + 256];
 	struct stat st;
 	struct timeval tv;
 	uint64		epoch;			/* milliseconds */
@@ -268,23 +270,23 @@ basic_archive_file_internal(const char *file, const char *path)
 		pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch))
 		elog(ERROR, "could not generate temporary file name for archiving");
 
-	snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT,
+	snprintf(tempfilepath, sizeof(tempfilepath), "%s/%s.%s.%d." UINT64_FORMAT,
 			 archive_directory, "archtemp", file, MyProcPid, epoch);
 
 	/*
 	 * Copy the file to its temporary destination.  Note that this will fail
 	 * if temp already exists.
 	 */
-	if (copy_file(unconstify(char *, path), temp, LOG) != 0)
+	if (copy_file(unconstify(char *, path), tempfilepath, LOG) != 0)
 	{
 		/* Remove the leftover temporary file. */
 		if (errno != EEXIST)
-			unlink(temp);
+			unlink(tempfilepath);
 
 		ereport(ERROR,
 				(errcode_for_file_access(),
 				 errmsg("could not copy file \"%s\" to temporary file \"%s\": %m",
-						path, temp)));
+						path, tempfilepath)));
 	}
 
 	/*
@@ -292,17 +294,24 @@ basic_archive_file_internal(const char *file, const char *path)
 	 * Note that this will overwrite any existing file, but this is only
 	 * possible if someone else created the file since the stat() above.
 	 */
-	if (durable_rename(temp, destination, LOG) != 0)
+	if (durable_rename(tempfilepath, destination, LOG) != 0)
 	{
 		/* Remove the leftover temporary file. */
-		unlink(temp);
+		unlink(tempfilepath);
 
 		ereport(ERROR,
 				(errcode_for_file_access(),
 				 errmsg("could not rename temporary file \"%s\" to \"%s\": %m",
-						temp, destination)));
+						tempfilepath, destination)));
 	}
 
+	/*
+	 * Reset tempfilepath after renaming the temporary file to the final file
+	 * so that the shutdown callback, if called after this point, will not
+	 * attempt to remove it and fail.
+	 */
+	tempfilepath[0] = '\0';
+
 	ereport(DEBUG1,
 			(errmsg("archived \"%s\" via basic_archive", file)));
 }
@@ -387,3 +396,16 @@ compare_files(const char *file1, const char *file2)
 
 	return ret;
 }
+
+static void
+basic_archive_shutdown(void)
+{
+	if (tempfilepath[0] == '\0')
+		return;
+
+	/* Remove the leftover temporary file. */
+	if (unlink(tempfilepath) < 0)
+		ereport(WARNING,
+				(errcode_for_file_access(),
+				 errmsg("could not unlink file \"%s\": %m", tempfilepath)));
+}
-- 
2.34.1

Reply via email to