On Tue, Nov 8, 2022 at 3:18 AM Nathan Bossart <nathandboss...@gmail.com> wrote: > > The call to snprintf() should take care of adding a terminating null byte > in the right place.
Ah, my bad. MemSet is avoided in v5 patch setting only the first byte. > > So, IIUC, your point here is what if the copy_file fails to create the > > temp file when it already exists. With the name collision being a rare > > scenario, given the pid and timestamp variables, I'm not sure if > > copy_file can ever fail because the temp file already exists (with > > errno EEXIST). However, if we want to be extra-cautious, checking if > > temp file exists with file_exists() before calling copy_file() might > > help avoid such cases. If we don't want to have extra system call (via > > file_exists()) to check the temp file existence, we can think of > > sending a flag to copy_file(src, dst, &is_dst_file_created) and use > > is_dst_file_created in the shutdown callback. Thoughts? > > Presently, if copy_file() encounters a pre-existing file, it should ERROR, > which will be caught in the sigsetjmp() block in basic_archive_file(). The > shutdown callback shouldn't run in this scenario. Determining the "file already exists" error/EEXIST case from a bunch of other errors in copy_file() is tricky. However, I quickly hacked up copy_file() by adding elevel parameter, please see the attached v5-0001. > I think this cleanup logic should run in both the shutdown callback and the > sigsetjmp() block, but it should only take action (i.e., deleting the > leftover temporary file) if the ERROR or shutdown occurs after creating the > file in copy_file() and before renaming the temporary file to its final > destination. Please see attached v5 patch set. If the direction seems okay, I'll add a CF entry. -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From cd805d6d26e66dcfc51ca6632a0147827a5b9544 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 9 Nov 2022 08:29:19 +0000 Subject: [PATCH v5] Refactor copy_file() to remove leftover temp files in basic_archive --- contrib/basic_archive/basic_archive.c | 23 ++++++++++- src/backend/storage/file/copydir.c | 56 ++++++++++++++++++++------- src/backend/storage/file/reinit.c | 2 +- src/include/storage/copydir.h | 2 +- 4 files changed, 66 insertions(+), 17 deletions(-) diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c index 9f221816bb..1b9f48eeed 100644 --- a/contrib/basic_archive/basic_archive.c +++ b/contrib/basic_archive/basic_archive.c @@ -275,14 +275,33 @@ basic_archive_file_internal(const char *file, const char *path) * Copy the file to its temporary destination. Note that this will fail * if temp already exists. */ - copy_file(unconstify(char *, path), temp); + if (copy_file(unconstify(char *, path), temp, LOG) != 0) + { + /* Remove the leftover temporary file. */ + if (errno != EEXIST) + unlink(temp); + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not copy file \"%s\" to temporary file \"%s\": %m", + path, temp))); + } /* * Sync the temporary file to disk and move it to its final destination. * Note that this will overwrite any existing file, but this is only * possible if someone else created the file since the stat() above. */ - (void) durable_rename(temp, destination, ERROR); + if (durable_rename(temp, destination, LOG) != 0) + { + /* Remove the leftover temporary file. */ + unlink(temp); + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rename temporary file \"%s\" to \"%s\": %m", + temp, destination))); + } ereport(DEBUG1, (errmsg("archived \"%s\" via basic_archive", file))); diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index 0cea4cbc89..ee20c83949 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -71,7 +71,7 @@ copydir(char *fromdir, char *todir, bool recurse) copydir(fromfile, tofile, true); } else if (xlde_type == PGFILETYPE_REG) - copy_file(fromfile, tofile); + copy_file(fromfile, tofile, ERROR); } FreeDir(xldir); @@ -113,8 +113,8 @@ copydir(char *fromdir, char *todir, bool recurse) /* * copy one file */ -void -copy_file(char *fromfile, char *tofile) +int +copy_file(char *fromfile, char *tofile, int elevel) { char *buffer; int srcfd; @@ -138,28 +138,35 @@ copy_file(char *fromfile, char *tofile) #define FLUSH_DISTANCE (1024 * 1024) #endif - /* Use palloc to ensure we get a maxaligned buffer */ - buffer = palloc(COPY_BUF_SIZE); - /* * Open the files */ srcfd = OpenTransientFile(fromfile, O_RDONLY | PG_BINARY); if (srcfd < 0) - ereport(ERROR, + { + ereport(elevel, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", fromfile))); + return -1; + } dstfd = OpenTransientFile(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY); if (dstfd < 0) - ereport(ERROR, + { + ereport(elevel, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tofile))); + return -1; + } /* * Do the data copying. */ flush_offset = 0; + + /* Use palloc to ensure we get a maxaligned buffer */ + buffer = palloc(COPY_BUF_SIZE); + for (offset = 0;; offset += nbytes) { /* If we got a cancel signal during the copy of the file, quit */ @@ -180,37 +187,60 @@ copy_file(char *fromfile, char *tofile) nbytes = read(srcfd, buffer, COPY_BUF_SIZE); pgstat_report_wait_end(); if (nbytes < 0) - ereport(ERROR, + { + ereport(elevel, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", fromfile))); + pfree(buffer); + CloseTransientFile(srcfd); + CloseTransientFile(dstfd); + return -1; + } if (nbytes == 0) break; errno = 0; pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_WRITE); if ((int) write(dstfd, buffer, nbytes) != nbytes) { + pgstat_report_wait_end(); + /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) errno = ENOSPC; - ereport(ERROR, + ereport(elevel, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", tofile))); + + pfree(buffer); + CloseTransientFile(srcfd); + CloseTransientFile(dstfd); + return -1; } pgstat_report_wait_end(); } + pfree(buffer); + if (offset > flush_offset) pg_flush_data(dstfd, flush_offset, offset - flush_offset); if (CloseTransientFile(dstfd) != 0) - ereport(ERROR, + { + ereport(elevel, (errcode_for_file_access(), errmsg("could not close file \"%s\": %m", tofile))); + CloseTransientFile(srcfd); + return -1; + } + if (CloseTransientFile(srcfd) != 0) - ereport(ERROR, + { + ereport(elevel, (errcode_for_file_access(), errmsg("could not close file \"%s\": %m", fromfile))); + return -1; + } - pfree(buffer); + return 0; } diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 647c458b52..0a12e57eb8 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -312,7 +312,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op) /* OK, we're ready to perform the actual copy. */ elog(DEBUG2, "copying %s to %s", srcpath, dstpath); - copy_file(srcpath, dstpath); + copy_file(srcpath, dstpath, ERROR); } FreeDir(dbspace_dir); diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h index 50a26edeb0..68fbbd12a3 100644 --- a/src/include/storage/copydir.h +++ b/src/include/storage/copydir.h @@ -14,6 +14,6 @@ #define COPYDIR_H extern void copydir(char *fromdir, char *todir, bool recurse); -extern void copy_file(char *fromfile, char *tofile); +extern int copy_file(char *fromfile, char *tofile, int elevel); #endif /* COPYDIR_H */ -- 2.34.1
From 22acbf446a4d57fbf1f78985c6c130dcf9871d39 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 9 Nov 2022 08:44:46 +0000 Subject: [PATCH v5] Add basic_archive shutdown callback to remove leftover temporary files --- contrib/basic_archive/basic_archive.c | 38 +++++++++++++++++++++------ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c index 1b9f48eeed..19b48818cb 100644 --- a/contrib/basic_archive/basic_archive.c +++ b/contrib/basic_archive/basic_archive.c @@ -42,10 +42,12 @@ PG_MODULE_MAGIC; static char *archive_directory = NULL; static MemoryContext basic_archive_context; +static char tempfilepath[MAXPGPATH + 256]; static bool basic_archive_configured(void); static bool basic_archive_file(const char *file, const char *path); static void basic_archive_file_internal(const char *file, const char *path); +static void basic_archive_shutdown(void); static bool check_archive_directory(char **newval, void **extra, GucSource source); static bool compare_files(const char *file1, const char *file2); @@ -85,6 +87,7 @@ _PG_archive_module_init(ArchiveModuleCallbacks *cb) cb->check_configured_cb = basic_archive_configured; cb->archive_file_cb = basic_archive_file; + cb->shutdown_cb = basic_archive_shutdown; } /* @@ -215,7 +218,6 @@ static void basic_archive_file_internal(const char *file, const char *path) { char destination[MAXPGPATH]; - char temp[MAXPGPATH + 256]; struct stat st; struct timeval tv; uint64 epoch; /* milliseconds */ @@ -268,23 +270,23 @@ basic_archive_file_internal(const char *file, const char *path) pg_add_u64_overflow(epoch, (uint64) (tv.tv_usec / 1000), &epoch)) elog(ERROR, "could not generate temporary file name for archiving"); - snprintf(temp, sizeof(temp), "%s/%s.%s.%d." UINT64_FORMAT, + snprintf(tempfilepath, sizeof(tempfilepath), "%s/%s.%s.%d." UINT64_FORMAT, archive_directory, "archtemp", file, MyProcPid, epoch); /* * Copy the file to its temporary destination. Note that this will fail * if temp already exists. */ - if (copy_file(unconstify(char *, path), temp, LOG) != 0) + if (copy_file(unconstify(char *, path), tempfilepath, LOG) != 0) { /* Remove the leftover temporary file. */ if (errno != EEXIST) - unlink(temp); + unlink(tempfilepath); ereport(ERROR, (errcode_for_file_access(), errmsg("could not copy file \"%s\" to temporary file \"%s\": %m", - path, temp))); + path, tempfilepath))); } /* @@ -292,17 +294,24 @@ basic_archive_file_internal(const char *file, const char *path) * Note that this will overwrite any existing file, but this is only * possible if someone else created the file since the stat() above. */ - if (durable_rename(temp, destination, LOG) != 0) + if (durable_rename(tempfilepath, destination, LOG) != 0) { /* Remove the leftover temporary file. */ - unlink(temp); + unlink(tempfilepath); ereport(ERROR, (errcode_for_file_access(), errmsg("could not rename temporary file \"%s\" to \"%s\": %m", - temp, destination))); + tempfilepath, destination))); } + /* + * Reset tempfilepath after renaming the temporary file to the final file + * so that the shutdown callback, if called after this point, will not + * attempt to remove it and fail. + */ + tempfilepath[0] = '\0'; + ereport(DEBUG1, (errmsg("archived \"%s\" via basic_archive", file))); } @@ -387,3 +396,16 @@ compare_files(const char *file1, const char *file2) return ret; } + +static void +basic_archive_shutdown(void) +{ + if (tempfilepath[0] == '\0') + return; + + /* Remove the leftover temporary file. */ + if (unlink(tempfilepath) < 0) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not unlink file \"%s\": %m", tempfilepath))); +} -- 2.34.1