On Wed, Sep 21, 2022 at 8:03 AM Bharath Rupireddy <bharath.rupireddyforpostg...@gmail.com> wrote: > > On Fri, Aug 19, 2022 at 5:27 PM Bharath Rupireddy > <bharath.rupireddyforpostg...@gmail.com> wrote: > > > > Please review the attached v6 patch. > > I'm attaching the v7 patch rebased on to the latest HEAD.
v7 patch was failing on Windows [1]. This is because of the full path name being sent to dir_existsfile() instead of just sending the temp file name. I fixed the issue. Thanks Michael Paquier for an offlist chat. PSA v8 patch. [1] t/010_pg_basebackup.pl ... 134/? # Failed test 'pg_basebackup reports checksum mismatch stderr /(?^s:^WARNING.*checksum verification failed)/' # at t/010_pg_basebackup.pl line 769. # 'unrecognized win32 error code: 123WARNING: checksum verification failed in file "./base/5/16399", block 0: calculated 4C09 but expected B3F6 # pg_basebackup: error: checksum error occurred -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From 4bb81868810f55675a2abb97531d70ae8e0e3405 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Thu, 22 Sep 2022 01:07:18 +0000 Subject: [PATCH v8] Make WAL file initialization by pg_receivewal atomic While initializing WAL files with zeros (i.e. padding) the pg_receivewal can fail for any reason, for instance, no space left on device or host server crash etc., which may leave partially padded WAL files due to which the pg_receivewal won't come up after the hostserver restart. The error it emits is "error: write-ahead log file "foo.partial" has x bytes, should be 0 or y" (where y is size of WAL file typically and x < y). To fix this, one needs to manually intervene and delete the partially padded WAL file which requires an internal understanding of what the issue is and often a time-consuming process in production environments. The above problem can also exist for pg_basebackup as it uses the same walmethods.c function for initialization. To address the above problem, make WAL file initialization atomic i.e. first create a temporary file, pad it and then rename it to the target file. This is similar to what postgres does to make WAL file initialization atomic. Authors: Bharath Rupireddy, SATYANARAYANA NARLAPURAM Reviewed-by: Cary Huang, mahendrakar s Reviewed-by: Kyotaro Horiguchi, Michael Paquier Discussion: https://www.postgresql.org/message-id/CAHg%2BQDcVUss6ocOmbLbV5f4YeGLhOCt%2B1x2yLNfG2H_eDwU8tw%40mail.gmail.com --- src/bin/pg_basebackup/pg_receivewal.c | 22 ++++++ src/bin/pg_basebackup/walmethods.c | 98 ++++++++++++++++++++++++--- 2 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c index f98ec557db..1cbc992e67 100644 --- a/src/bin/pg_basebackup/pg_receivewal.c +++ b/src/bin/pg_basebackup/pg_receivewal.c @@ -237,6 +237,28 @@ is_xlogfilename(const char *filename, bool *ispartial, return true; } + /* + * File looks like a temporary partial uncompressed WAL file that was + * leftover during pre-padding phase from a previous crash, delete it now + * as we don't need it. + */ + if (fname_len == XLOG_FNAME_LEN + strlen(".partial.tmp") && + strcmp(filename + XLOG_FNAME_LEN, ".partial.tmp") == 0) + { + /* 12 is length of string ".partial.tmp" */ + char path[MAXPGPATH + XLOG_FNAME_LEN + 12]; + + snprintf(path, sizeof(path), "%s/%s", basedir, filename); + + if (unlink(path) < 0) + pg_log_error("could not remove file \"%s\"", path); + + if (verbose) + pg_log_info("removed file \"%s\"", path); + + return false; + } + /* File does not look like something we know */ return false; } diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c index bc2e83d02b..ed7b3185b0 100644 --- a/src/bin/pg_basebackup/walmethods.c +++ b/src/bin/pg_basebackup/walmethods.c @@ -118,7 +118,11 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname, const char *temp_suffix, size_t pad_to_size) { DirectoryMethodData *dir_data = (DirectoryMethodData *) wwmethod; - char tmppath[MAXPGPATH]; + char targetpath[MAXPGPATH]; + char tmpfilename[MAXPGPATH]; + char tmpsuffixpath[MAXPGPATH]; + bool shouldcreatetempfile = false; + int flags; char *filename; int fd; DirectoryMethodFile *f; @@ -134,8 +138,9 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname, clear_error(wwmethod); filename = dir_get_file_name(wwmethod, pathname, temp_suffix); - snprintf(tmppath, sizeof(tmppath), "%s/%s", + snprintf(targetpath, sizeof(targetpath), "%s/%s", dir_data->basedir, filename); + snprintf(tmpfilename, MAXPGPATH, "%s.tmp", filename); pg_free(filename); /* @@ -143,12 +148,57 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname, * the file descriptor is important for dir_sync() method as gzflush() * does not do any system calls to fsync() to make changes permanent on * disk. + * + * Create a temporary file for padding and no compression cases to ensure + * a fully initialized file is created. */ - fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, pg_file_create_mode); + if (pad_to_size > 0 && + wwmethod->compression_algorithm == PG_COMPRESSION_NONE) + { + shouldcreatetempfile = true; + flags = O_WRONLY | PG_BINARY; + } + else + { + shouldcreatetempfile = false; + flags = O_WRONLY | O_CREAT | PG_BINARY; + } + + fd = open(targetpath, flags, pg_file_create_mode); if (fd < 0) { - wwmethod->lasterrno = errno; - return NULL; + if (errno == ENOENT && shouldcreatetempfile) + { + /* + * Actual file doesn't exist. Now, create a temporary file pad it + * and rename to the target file. The temporary file may exist from + * the last failed attempt (failed during partial padding or + * renaming or some other). If it exists, let's play safe and + * delete it before creating a new one. + */ + snprintf(tmpsuffixpath, MAXPGPATH, "%s.tmp", targetpath); + + if (dir_existsfile(wwmethod, tmpfilename)) + { + if (unlink(tmpsuffixpath) != 0) + { + wwmethod->lasterrno = errno; + return NULL; + } + } + + fd = open(tmpsuffixpath, flags | O_CREAT, pg_file_create_mode); + if (fd < 0) + { + wwmethod->lasterrno = errno; + return NULL; + } + } + else + { + wwmethod->lasterrno = errno; + return NULL; + } } #ifdef HAVE_LIBZ @@ -244,16 +294,46 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname, } } + /* Rename the temporary file to target file */ + if (shouldcreatetempfile) + { + int rc; + + if (wwmethod->sync) + rc = durable_rename(tmpsuffixpath, targetpath); + else + rc = rename(tmpsuffixpath, targetpath); + + if (rc != 0) + { + wwmethod->lasterrno = errno; + close(fd); + + /* + * Removing the temporary file may as well fail here, but we are + * not concerned about it right now as we are already returning an + * error while renaming. However, the leftover temporary file gets + * deleted during the next padding cycle. + */ + unlink(tmpsuffixpath); + + return NULL; + } + } + /* * fsync WAL file and containing directory, to ensure the file is * persistently created and zeroed (if padded). That's particularly * important when using synchronous mode, where the file is modified and * fsynced in-place, without a directory fsync. + * + * For padding/temporary file case, durable_rename would have already + * fsynced file and containing directory. */ - if (wwmethod->sync) + if (!shouldcreatetempfile && wwmethod->sync) { - if (fsync_fname(tmppath, false) != 0 || - fsync_parent_path(tmppath) != 0) + if (fsync_fname(targetpath, false) != 0 || + fsync_parent_path(targetpath) != 0) { wwmethod->lasterrno = errno; #ifdef HAVE_LIBZ @@ -294,7 +374,7 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname, f->base.currpos = 0; f->base.pathname = pg_strdup(pathname); f->fd = fd; - f->fullpath = pg_strdup(tmppath); + f->fullpath = pg_strdup(targetpath); if (temp_suffix) f->temp_suffix = pg_strdup(temp_suffix); -- 2.34.1