Michaƫl-san,
The issue here is that trying to embed directly the fsync routines from file_utils.c into pg_resetwal.c messes up the inclusions because pg_resetwal.c includes backend-side includes, which themselves touch fd.h :( In short your approach avoids some extra mess with the include dependencies. .
I could remove the two "catalog/" includes from pg_resetwal, I assume that you meant these ones.
Maybe the two changes could be committed separately.I was thinking about this one, and for pg_rewind we don't care about the fsync of the control file because the full data folder gets fsync'd afterwards and in the event of a crash in the middle of a rewind the target data folder is surely not something to use, but we do for pg_checksums, and we do for pg_resetwal. Even if there is the argument that usually callers of update_controlfile() would care a lot about the control file and fsync it, I think that we need some control on if we do the fsync or not because many tools have a --no-sync and that should be fully respected.
So while your patch is on a good track, I would suggest to do the following things to complete it:
- Add an extra argument bits16 to update_controlfile to pass a set of optional flags, with NOSYNC being the only and current value. The default is to flush the file.
Hmmm. I just did that, but what about just a boolean? What other options could be required? Maybe some locking/checking?
- Move the wait event calls WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE and WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE to controldata_utils.c.
Done.
- And then delete UpdateControlFile() in xlog.c, and use update_controlfile() instead to remove even more code.
I was keeping that one for another patch because it touches the backend code, but it makes sense to do that in one go for consistency.
I kept the initial no-parameter function which calls the new one with 4 parameters, though, because it looks more homogeneous this way in the backend code. This is debatable.
The version in xlog.c uses BasicOpenFile(), so we should use also that in update_controlfile() instead of OpenTransientFile(). As any errors result in a PANIC we don't care about leaking fds.
Done. Attached is an update. -- Fabien.
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 54d3c558c6..7f782e255c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -40,6 +40,7 @@ #include "catalog/pg_control.h" #include "catalog/pg_database.h" #include "commands/tablespace.h" +#include "common/controldata_utils.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" @@ -4757,48 +4758,7 @@ ReadControlFile(void) void UpdateControlFile(void) { - int fd; - - INIT_CRC32C(ControlFile->crc); - COMP_CRC32C(ControlFile->crc, - (char *) ControlFile, - offsetof(ControlFileData, crc)); - FIN_CRC32C(ControlFile->crc); - - fd = BasicOpenFile(XLOG_CONTROL_FILE, - O_RDWR | PG_BINARY); - if (fd < 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", XLOG_CONTROL_FILE))); - - errno = 0; - pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); - if (write(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData)) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", - XLOG_CONTROL_FILE))); - } - pgstat_report_wait_end(); - - pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE); - if (pg_fsync(fd) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", - XLOG_CONTROL_FILE))); - pgstat_report_wait_end(); - - if (close(fd)) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", - XLOG_CONTROL_FILE))); + update_controlfile(".", XLOG_CONTROL_FILE, ControlFile, CF_OPT_NONE); } /* diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 2af8713216..334903711e 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -49,8 +49,7 @@ #include "access/multixact.h" #include "access/xlog.h" #include "access/xlog_internal.h" -#include "catalog/catversion.h" -#include "catalog/pg_control.h" +#include "common/controldata_utils.h" #include "common/fe_memutils.h" #include "common/file_perm.h" #include "common/restricted_token.h" @@ -918,18 +917,6 @@ PrintNewControlValues(void) static void RewriteControlFile(void) { - int fd; - char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */ - - /* - * For good luck, apply the same static assertions as in backend's - * WriteControlFile(). - */ - StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE, - "pg_control is too large for atomic disk writes"); - StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE, - "sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE"); - /* * Adjust fields as needed to force an empty XLOG starting at * newXlogSegNo. @@ -961,53 +948,7 @@ RewriteControlFile(void) ControlFile.max_prepared_xacts = 0; ControlFile.max_locks_per_xact = 64; - /* Contents are protected with a CRC */ - INIT_CRC32C(ControlFile.crc); - COMP_CRC32C(ControlFile.crc, - (char *) &ControlFile, - offsetof(ControlFileData, crc)); - FIN_CRC32C(ControlFile.crc); - - /* - * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding - * the excess over sizeof(ControlFileData). This reduces the odds of - * premature-EOF errors when reading pg_control. We'll still fail when we - * check the contents of the file, but hopefully with a more specific - * error than "couldn't read pg_control". - */ - memset(buffer, 0, PG_CONTROL_FILE_SIZE); - memcpy(buffer, &ControlFile, sizeof(ControlFileData)); - - unlink(XLOG_CONTROL_FILE); - - fd = open(XLOG_CONTROL_FILE, - O_RDWR | O_CREAT | O_EXCL | PG_BINARY, - pg_file_create_mode); - if (fd < 0) - { - fprintf(stderr, _("%s: could not create pg_control file: %s\n"), - progname, strerror(errno)); - exit(1); - } - - errno = 0; - if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - fprintf(stderr, _("%s: could not write pg_control file: %s\n"), - progname, strerror(errno)); - exit(1); - } - - if (fsync(fd) != 0) - { - fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno)); - exit(1); - } - - close(fd); + update_controlfile(XLOG_CONTROL_FILE, progname, &ControlFile, CF_OPT_NONE); } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 7f1d6bf48a..d1908912ff 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -377,7 +377,7 @@ main(int argc, char **argv) ControlFile_new.minRecoveryPoint = endrec; ControlFile_new.minRecoveryPointTLI = endtli; ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY; - update_controlfile(datadir_target, progname, &ControlFile_new); + update_controlfile(datadir_target, progname, &ControlFile_new, CF_OPT_NONE); pg_log(PG_PROGRESS, "syncing target data directory\n"); syncTargetDirectory(); diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 71e67a2eda..65d9acc88e 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -31,6 +31,7 @@ #include "port/pg_crc32c.h" #ifndef FRONTEND #include "storage/fd.h" +#include "pgstat.h" #endif /* @@ -144,13 +145,13 @@ get_controlfile(const char *DataDir, const char *progname, bool *crc_ok_p) * update_controlfile() * * Update controlfile values with the contents given by caller. The - * contents to write are included in "ControlFile". Note that it is up - * to the caller to fsync the updated file, and to properly lock - * ControlFileLock when calling this routine in the backend. + * contents to write are included in "ControlFile". Not that it is + * to the caller to properly lock ControlFileLock when calling this + * routine in the backend. */ void update_controlfile(const char *DataDir, const char *progname, - ControlFileData *ControlFile) + ControlFileData *ControlFile, bits16 options) { int fd; char buffer[PG_CONTROL_FILE_SIZE]; @@ -182,7 +183,7 @@ update_controlfile(const char *DataDir, const char *progname, snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE); #ifndef FRONTEND - if ((fd = OpenTransientFile(ControlFilePath, O_WRONLY | PG_BINARY)) == -1) + if ((fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY)) < 0) ereport(PANIC, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", @@ -198,6 +199,9 @@ update_controlfile(const char *DataDir, const char *progname, #endif errno = 0; +#ifndef FRONTEND + pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); +#endif if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE) { /* if write didn't set errno, assume problem is no disk space */ @@ -215,19 +219,34 @@ update_controlfile(const char *DataDir, const char *progname, exit(EXIT_FAILURE); #endif } +#ifndef FRONTEND + pgstat_report_wait_end(); +#endif + if (options & CF_OPT_NOSYNC == 0) + { #ifndef FRONTEND - if (CloseTransientFile(fd)) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", - ControlFilePath))); + pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE); + if (pg_fsync(fd) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + ControlFilePath))); + pgstat_report_wait_end(); #else + if (fsync(fd) != 0) + { + fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), + progname, ControlFilePath, strerror(errno)); + exit(EXIT_FAILURE); + } +#endif + } + if (close(fd) < 0) { fprintf(stderr, _("%s: could not close file \"%s\": %s\n"), progname, ControlFilePath, strerror(errno)); exit(EXIT_FAILURE); } -#endif } diff --git a/src/include/common/controldata_utils.h b/src/include/common/controldata_utils.h index 95317ebacf..054454b319 100644 --- a/src/include/common/controldata_utils.h +++ b/src/include/common/controldata_utils.h @@ -12,10 +12,15 @@ #include "catalog/pg_control.h" +enum controlfile_update_bitmask_options { + CF_OPT_NONE = 0x00, + CF_OPT_NOSYNC = 0x01 +}; + extern ControlFileData *get_controlfile(const char *DataDir, const char *progname, bool *crc_ok_p); extern void update_controlfile(const char *DataDir, const char *progname, - ControlFileData *ControlFile); + ControlFileData *ControlFile, bits16 options); #endif /* COMMON_CONTROLDATA_UTILS_H */