Forking this thread in which Thomas implemented syncfs for the startup process (61752afb2). https://www.postgresql.org/message-id/flat/CA%2BhUKG%2BSG9jSW3ekwib0cSdC0yD-jReJ21X4bZAmqxoWTLTc2A%40mail.gmail.com
Is there any reason that initdb/pg_basebackup/pg_checksums/pg_rewind shouldn't use syncfs() ? do_syncfs() is in src/backend/ so would need to be duplicated^Wimplemented in common. They can't use the GUC, so need to add an cmdline option or look at an environment variable.
>From 747ec6011d438a655ee26105a315119b75036c10 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Wed, 29 Sep 2021 14:22:32 -0500 Subject: [PATCH] Allow use of syncfs() in frontend tools Like initdb/basebackup/checksums/rewind. See also: 61752afb26404dfc99a535c7a53f7f04dc110263 --- src/backend/utils/misc/guc.c | 1 + src/bin/initdb/initdb.c | 10 +++- src/bin/pg_basebackup/pg_basebackup.c | 2 +- src/bin/pg_checksums/pg_checksums.c | 2 +- src/bin/pg_rewind/file_ops.c | 2 +- src/common/file_utils.c | 72 ++++++++++++++++++++++++++- src/include/common/file_utils.h | 2 +- 7 files changed, 83 insertions(+), 8 deletions(-) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index d2ce4a8450..e37f6941f4 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -50,6 +50,7 @@ #include "commands/user.h" #include "commands/vacuum.h" #include "commands/variable.h" +#include "common/file_utils.h" #include "common/string.h" #include "funcapi.h" #include "jit/jit.h" diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 1ed4808d53..79048f3949 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -142,6 +142,7 @@ static bool noclean = false; static bool noinstructions = false; static bool do_sync = true; static bool sync_only = false; +static bool use_syncfs = false; static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; @@ -2200,6 +2201,7 @@ usage(const char *progname) printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -s, --show show internal settings\n")); printf(_(" -S, --sync-only only sync database files to disk, then exit\n")); + printf(_(" , --syncfs use syncfs() rather than recursive fsync()\n")); printf(_("\nOther options:\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -2869,6 +2871,7 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 'N'}, {"no-instructions", no_argument, NULL, 13}, {"sync-only", no_argument, NULL, 'S'}, + {"syncfs", no_argument, NULL, 15}, {"waldir", required_argument, NULL, 'X'}, {"wal-segsize", required_argument, NULL, 12}, {"data-checksums", no_argument, NULL, 'k'}, @@ -3020,6 +3023,9 @@ main(int argc, char *argv[]) extra_options, "-c debug_discard_caches=1"); break; + case 15: + use_syncfs = true; + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), @@ -3064,7 +3070,7 @@ main(int argc, char *argv[]) fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - fsync_pgdata(pg_data, PG_VERSION_NUM); + fsync_pgdata(pg_data, PG_VERSION_NUM, use_syncfs); check_ok(); return 0; } @@ -3153,7 +3159,7 @@ main(int argc, char *argv[]) { fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - fsync_pgdata(pg_data, PG_VERSION_NUM); + fsync_pgdata(pg_data, PG_VERSION_NUM, use_syncfs); check_ok(); } else diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 669aa207a3..0b59b1fe32 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -2192,7 +2192,7 @@ BaseBackup(void) } else { - (void) fsync_pgdata(basedir, serverVersion); + (void) fsync_pgdata(basedir, serverVersion, false); } } diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index e833c5d75e..021a4b18f9 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -687,7 +687,7 @@ main(int argc, char *argv[]) if (do_sync) { pg_log_info("syncing data directory"); - fsync_pgdata(DataDir, PG_VERSION_NUM); + fsync_pgdata(DataDir, PG_VERSION_NUM, false); } pg_log_info("updating control file"); diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index c50f283ede..c50d5945ab 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -296,7 +296,7 @@ sync_target_dir(void) if (!do_sync || dry_run) return; - fsync_pgdata(datadir_target, PG_VERSION_NUM); + fsync_pgdata(datadir_target, PG_VERSION_NUM, false); } diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 40b73bbe1a..c8b62e8aed 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -50,8 +50,33 @@ static void walkdir(const char *path, int (*action) (const char *fname, bool isdir), bool process_symlinks); +/* This is a frontend version of what's in src/backend/storage/file/fd.c */ +#ifdef HAVE_SYNCFS +static void +do_syncfs(const char *path) +{ + int fd; + + fd = open(path, O_RDONLY, 0); + if (fd < 0) + { + pg_log_error("could not open directory \"%s\": %m", path); + return; + } + if (syncfs(fd) < 0) + { + pg_log_fatal("could not synchronize file system for file \"%s\": %m", path); + exit(EXIT_FAILURE); + } + close(fd); +} +#endif + /* - * Issue fsync recursively on PGDATA and all its contents. + * Issue fsync recursively on PGDATA and all its contents, or issue syncfs for + * all potential filesystem, depending on recovery_init_sync_method setting. + * + * This is a frontend version of SyncDataDirectory. * * We fsync regular files and directories wherever they are, but we follow * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc. @@ -62,7 +87,8 @@ static void walkdir(const char *path, */ void fsync_pgdata(const char *pg_data, - int serverVersion) + int serverVersion, + bool use_syncfs) { bool xlog_is_symlink; char pg_wal[MAXPGPATH]; @@ -93,6 +119,48 @@ fsync_pgdata(const char *pg_data, xlog_is_symlink = true; #endif +#ifdef HAVE_SYNCFS + if (use_syncfs) + { + DIR *dir; + struct dirent *de; + + /* + * On Linux, we don't have to open every single file one by one. We + * can use syncfs() to sync whole filesystems. We only expect + * filesystem boundaries to exist where we tolerate symlinks, namely + * pg_wal and the tablespaces, so we call syncfs() for each of those + * directories. + */ + + /* Sync the top level pgdata directory. */ + do_syncfs(pg_data); + /* If any tablespaces are configured, sync each of those. */ + dir = opendir(pg_tblspc); + if (dir == NULL) + { + /* It's not a fatal error if pg_tblspc itself doesn't exist */ + pg_log_error("could not open directory \"%s\": %m", pg_tblspc); + } else { + while ((de = readdir(dir))) + { + char path[MAXPGPATH]; + + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) + continue; + + snprintf(path, MAXPGPATH, "%s/%s", pg_tblspc, de->d_name); + do_syncfs(path); + } + closedir(dir); + } + /* If pg_wal is a symlink, process that too. */ + if (xlog_is_symlink) + do_syncfs(pg_wal); + return; + } +#endif /* !HAVE_SYNCFS */ + /* * If possible, hint to the kernel that we're soon going to fsync the data * directory and its contents. diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index 978a57460a..e622829c0b 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -26,7 +26,7 @@ typedef enum PGFileType #ifdef FRONTEND extern int fsync_fname(const char *fname, bool isdir); -extern void fsync_pgdata(const char *pg_data, int serverVersion); +extern void fsync_pgdata(const char *pg_data, int serverVersion, bool use_syncfs); extern void fsync_dir_recurse(const char *dir); extern int durable_rename(const char *oldfile, const char *newfile); extern int fsync_parent_path(const char *fname); -- 2.17.0