On Fri, Apr 8, 2022 at 7:56 PM Dave Page <dp...@pgadmin.org> wrote: > Windows 8 was a pretty unpopular release, so I would expect shifting to > 10/2016+ for PG 16 would be unlikely to be a major problem.
Thanks to Michael for making that happen. That removes the main thing I didn't know how to deal with in this patch. Here's a rebase with some cleanup. With my garbage collector hat on, I see that all systems we target have fdatasync(), except: 1. Windows, but this patch supplies src/port/fdatasync.c. 2. DragonflyBSD before 6.1. We have 6.0 in the build farm. 3. Ancient macOS. Current releases have it, though we have to cope with a missing declaration. >From a standards point of view, fdatasync() is issue 5 POSIX like fsync(). Both are optional, but, being a database, we require fsync(), and they're both covered by the same POSIX option "Synchronized Input and Output". My plan now is to commit this patch so that problem #1 is solved, prod conchuela's owner to upgrade to solve #2, and wait until Tom shuts down prairiedog to solve #3. Then we could consider removing the HAVE_FDATASYNC probe and associated #ifdefs when convenient. For that reason, I'm not too bothered about the slight weirdness of defining HAVE_FDATASYNC on Windows even though that doesn't come from configure; it'd hopefully be short-lived. Better ideas welcome, though. Does that make sense?
From a985d52d1870ccfb92fda3316158242ce2ba3fe8 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Sun, 12 Dec 2021 14:13:35 +1300 Subject: [PATCH v3 1/2] Fix treatment of direct I/O in pg_test_fsync. pg_test_fsync traditionally enabled O_DIRECT for the open_datasync and open_sync tests. In fact current releases of PostgreSQL only do that if wal_level=minimal (less commonly used). So, run the test both ways. Discussion: https://postgr.es/m/CA%2BhUKGJZJVO%3DiX%2Beb-PXi2_XS9ZRqnn_4URh0NUQOwt6-_51xQ%40mail.gmail.com --- src/bin/pg_test_fsync/pg_test_fsync.c | 75 ++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c index f7bc199a30..109ccba819 100644 --- a/src/bin/pg_test_fsync/pg_test_fsync.c +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -294,14 +294,45 @@ test_sync(int writes_per_op) printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K); printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n")); +/* + * Test open_datasync with O_DIRECT if available + */ + printf(LABEL_FORMAT, "open_datasync (direct)"); + fflush(stdout); + +#if defined(OPEN_DATASYNC_FLAG) && (defined(O_DIRECT) || defined(F_NOCACHE)) + if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1) + { + printf(NA_FORMAT, _("n/a*")); + fs_warning = true; + } + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, _("n/a")); +#endif + /* - * Test open_datasync if available + * Test open_datasync buffered if available */ - printf(LABEL_FORMAT, "open_datasync"); + printf(LABEL_FORMAT, "open_datasync (buffered)"); fflush(stdout); #ifdef OPEN_DATASYNC_FLAG - if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1) + if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1) { printf(NA_FORMAT, _("n/a*")); fs_warning = true; @@ -402,12 +433,12 @@ test_sync(int writes_per_op) #endif /* - * Test open_sync if available + * Test open_sync with O_DIRECT if available */ - printf(LABEL_FORMAT, "open_sync"); + printf(LABEL_FORMAT, "open_sync (direct)"); fflush(stdout); -#ifdef OPEN_SYNC_FLAG +#if defined(OPEN_SYNC_FLAG) && (defined(O_DIRECT) || defined(F_NOCACHE)) if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1) { printf(NA_FORMAT, _("n/a*")); @@ -439,6 +470,38 @@ test_sync(int writes_per_op) printf(NA_FORMAT, _("n/a")); #endif +/* + * Test open_sync if available + */ + printf(LABEL_FORMAT, "open_sync (buffered)"); + fflush(stdout); + +#ifdef OPEN_SYNC_FLAG + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1) + { + printf(NA_FORMAT, _("n/a*")); + fs_warning = true; + } + else + { + START_TIMER; + for (ops = 0; alarm_triggered == false; ops++) + { + for (writes = 0; writes < writes_per_op; writes++) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + die("write failed"); + } + STOP_TIMER; + close(tmpfile); + } +#else + printf(NA_FORMAT, _("n/a")); +#endif + + if (fs_warning) { printf(_("* This file system and its mount options do not support direct\n" -- 2.35.1
From 56882f910e58f72987433c72c32fc6eedc1cbac9 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Sat, 5 Feb 2022 09:15:47 +1300 Subject: [PATCH v3 2/2] Add wal_sync_method=fdatasync for Windows. Windows 10 gained support for flushing files with fdatasync() semantics. The main advantage over wal_sync_method=open_datasync is that the latter does not flush SATA drive caches. According to OS documentation FLUSH_FLAGS_FILE_DATA_SYNC_ONLY works only on NTFS filesystems, so we can't consider making it the default. Discussion: https://postgr.es/m/CA%2BhUKGJZJVO%3DiX%2Beb-PXi2_XS9ZRqnn_4URh0NUQOwt6-_51xQ%40mail.gmail.com --- configure | 6 ++++ configure.ac | 1 + doc/src/sgml/wal.sgml | 3 +- src/include/c.h | 2 +- src/include/port/win32_port.h | 6 ++++ src/include/port/win32ntdll.h | 10 ++++++- src/port/fdatasync.c | 53 +++++++++++++++++++++++++++++++++++ src/port/win32ntdll.c | 6 +++- src/tools/msvc/Mkvcbuild.pm | 3 +- src/tools/msvc/Solution.pm | 2 +- 10 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 src/port/fdatasync.c diff --git a/configure b/configure index 1e63c6862b..a03339301d 100755 --- a/configure +++ b/configure @@ -16980,6 +16980,12 @@ fi ;; esac + case " $LIBOBJS " in + *" fdatasync.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS fdatasync.$ac_objext" + ;; +esac + case " $LIBOBJS " in *" kill.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS kill.$ac_objext" diff --git a/configure.ac b/configure.ac index 71191f14ad..9528f7fb42 100644 --- a/configure.ac +++ b/configure.ac @@ -1975,6 +1975,7 @@ if test "$PORTNAME" = "win32"; then AC_CHECK_FUNCS(_configthreadlocale) AC_REPLACE_FUNCS(gettimeofday) AC_LIBOBJ(dirmod) + AC_LIBOBJ(fdatasync) AC_LIBOBJ(kill) AC_LIBOBJ(open) AC_LIBOBJ(system) diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 4b6ef283c1..01f7379ebb 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -108,7 +108,8 @@ <literal>open_datasync</literal> (the default), write caching can be disabled by unchecking <literal>My Computer\Open\<replaceable>disk drive</replaceable>\Properties\Hardware\Properties\Policies\Enable write caching on the disk</literal>. Alternatively, set <varname>wal_sync_method</varname> to - <literal>fsync</literal> or <literal>fsync_writethrough</literal>, which prevent + <literal>fdatasync</literal> (NTFS only), <literal>fsync</literal> or + <literal>fsync_writethrough</literal>, which prevent write caching. </para> </listitem> diff --git a/src/include/c.h b/src/include/c.h index 863a16c6a6..fdf89e2742 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -1290,7 +1290,7 @@ typedef union PGAlignedXLogBlock * standard C library. */ -#if defined(HAVE_FDATASYNC) && !HAVE_DECL_FDATASYNC +#if !HAVE_DECL_FDATASYNC extern int fdatasync(int fildes); #endif diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 5121c0c626..5ea66528fa 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -83,6 +83,12 @@ #define HAVE_FSYNC_WRITETHROUGH #define FSYNC_WRITETHROUGH_IS_FSYNC +/* + * We have a replacement for fdatasync() in src/port/fdatasync.c, which is + * unconditionally used by MSVC and Mingw builds. + */ +#define HAVE_FDATASYNC + #define USES_WINSOCK /* diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h index 291b067ea4..34cebddd54 100644 --- a/src/include/port/win32ntdll.h +++ b/src/include/port/win32ntdll.h @@ -23,9 +23,17 @@ #include <ntstatus.h> #include <winternl.h> -typedef NTSTATUS (__stdcall * RtlGetLastNtStatus_t) (void); +#ifndef FLUSH_FLAGS_FILE_DATA_SYNC_ONLY +#define FLUSH_FLAGS_FILE_DATA_SYNC_ONLY 0x4 +#endif + +typedef NTSTATUS (__stdcall *RtlGetLastNtStatus_t) (void); +typedef ULONG (__stdcall *RtlNtStatusToDosError_t) (NTSTATUS); +typedef NTSTATUS (__stdcall *NtFlushBuffersFileEx_t) (HANDLE, ULONG, PVOID, ULONG, PIO_STATUS_BLOCK); extern PGDLLIMPORT RtlGetLastNtStatus_t pg_RtlGetLastNtStatus; +extern PGDLLIMPORT RtlNtStatusToDosError_t pg_RtlNtStatusToDosError; +extern PGDLLIMPORT NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx; extern int initialize_ntdll(void); diff --git a/src/port/fdatasync.c b/src/port/fdatasync.c new file mode 100644 index 0000000000..afef853aa3 --- /dev/null +++ b/src/port/fdatasync.c @@ -0,0 +1,53 @@ +/*------------------------------------------------------------------------- + * + * fdatasync.c + * Win32 fdatasync() replacement + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * + * src/port/fdatasync.c + * + *------------------------------------------------------------------------- + */ + +#define UMDF_USING_NTSTATUS + +#ifdef FRONTEND +#include "postgres_fe.h" +#else +#include "postgres.h" +#endif + +#include "port/win32ntdll.h" + +int +fdatasync(int fd) +{ + IO_STATUS_BLOCK iosb; + NTSTATUS status; + HANDLE handle; + + handle = (HANDLE) _get_osfhandle(fd); + if (handle == INVALID_HANDLE_VALUE) + { + errno = EBADF; + return -1; + } + + if (initialize_ntdll() < 0) + return -1; + + memset(&iosb, 0, sizeof(iosb)); + status = pg_NtFlushBuffersFileEx(handle, + FLUSH_FLAGS_FILE_DATA_SYNC_ONLY, + NULL, + 0, + &iosb); + + if (NT_SUCCESS(status)) + return 0; + + _dosmaperr(pg_RtlNtStatusToDosError(status)); + return -1; +} diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c index 10c33c6a01..eb61407754 100644 --- a/src/port/win32ntdll.c +++ b/src/port/win32ntdll.c @@ -20,6 +20,8 @@ #include "port/win32ntdll.h" RtlGetLastNtStatus_t pg_RtlGetLastNtStatus; +RtlNtStatusToDosError_t pg_RtlNtStatusToDosError; +NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx; typedef struct NtDllRoutine { @@ -28,7 +30,9 @@ typedef struct NtDllRoutine } NtDllRoutine; static const NtDllRoutine routines[] = { - {"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus} + {"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus}, + {"RtlNtStatusToDosError", (pg_funcptr_t *) &pg_RtlNtStatusToDosError}, + {"NtFlushBuffersFileEx", (pg_funcptr_t *) &pg_NtFlushBuffersFileEx} }; static bool initialized; diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index e4feda10fd..cc7a908d10 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -99,7 +99,8 @@ sub mkvcbuild $solution = CreateSolution($vsVersion, $config); our @pgportfiles = qw( - chklocale.c explicit_bzero.c fls.c getpeereid.c getrusage.c inet_aton.c + chklocale.c explicit_bzero.c fls.c fdatasync.c + getpeereid.c getrusage.c inet_aton.c getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c dirent.c dlopen.c getopt.c getopt_long.c link.c diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index fa32dc371d..c87bca8e6a 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -257,7 +257,7 @@ sub GenerateFiles HAVE_EDITLINE_READLINE_H => undef, HAVE_EXECINFO_H => undef, HAVE_EXPLICIT_BZERO => undef, - HAVE_FDATASYNC => undef, + HAVE_FDATASYNC => 1, HAVE_FLS => undef, HAVE_FSEEKO => 1, HAVE_FUNCNAME__FUNC => undef, -- 2.35.1