On Fri, Apr 8, 2022 at 7:56 PM Dave Page <dp...@pgadmin.org> wrote:
> Windows 8 was a pretty unpopular release, so I would expect shifting to 
> 10/2016+ for PG 16 would be unlikely to be a major problem.

Thanks to Michael for making that happen.  That removes the main thing
I didn't know how to deal with in this patch.  Here's a rebase with
some cleanup.

With my garbage collector hat on, I see that all systems we target
have fdatasync(), except:

1.  Windows, but this patch supplies src/port/fdatasync.c.
2.  DragonflyBSD before 6.1.  We have 6.0 in the build farm.
3.  Ancient macOS.  Current releases have it, though we have to cope
with a missing declaration.

>From a standards point of view, fdatasync() is issue 5 POSIX like
fsync().  Both are optional, but, being a database, we require
fsync(), and they're both covered by the same POSIX option
"Synchronized Input and Output".

My plan now is to commit this patch so that problem #1 is solved, prod
conchuela's owner to upgrade to solve #2, and wait until Tom shuts
down prairiedog to solve #3.  Then we could consider removing the
HAVE_FDATASYNC probe and associated #ifdefs when convenient.  For that
reason, I'm not too bothered about the slight weirdness of defining
HAVE_FDATASYNC on Windows even though that doesn't come from
configure; it'd hopefully be short-lived.  Better ideas welcome,
though.  Does that make sense?
From a985d52d1870ccfb92fda3316158242ce2ba3fe8 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sun, 12 Dec 2021 14:13:35 +1300
Subject: [PATCH v3 1/2] Fix treatment of direct I/O in pg_test_fsync.

pg_test_fsync traditionally enabled O_DIRECT for the open_datasync and
open_sync tests.  In fact current releases of PostgreSQL only do that if
wal_level=minimal (less commonly used).  So, run the test both ways.

Discussion: https://postgr.es/m/CA%2BhUKGJZJVO%3DiX%2Beb-PXi2_XS9ZRqnn_4URh0NUQOwt6-_51xQ%40mail.gmail.com
---
 src/bin/pg_test_fsync/pg_test_fsync.c | 75 ++++++++++++++++++++++++---
 1 file changed, 69 insertions(+), 6 deletions(-)

diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c
index f7bc199a30..109ccba819 100644
--- a/src/bin/pg_test_fsync/pg_test_fsync.c
+++ b/src/bin/pg_test_fsync/pg_test_fsync.c
@@ -294,14 +294,45 @@ test_sync(int writes_per_op)
 		printf(_("\nCompare file sync methods using two %dkB writes:\n"), XLOG_BLCKSZ_K);
 	printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n"));
 
+/*
+ * Test open_datasync with O_DIRECT if available
+ */
+	printf(LABEL_FORMAT, "open_datasync (direct)");
+	fflush(stdout);
+
+#if defined(OPEN_DATASYNC_FLAG) && (defined(O_DIRECT) || defined(F_NOCACHE))
+	if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
+	{
+		printf(NA_FORMAT, _("n/a*"));
+		fs_warning = true;
+	}
+	else
+	{
+		START_TIMER;
+		for (ops = 0; alarm_triggered == false; ops++)
+		{
+			for (writes = 0; writes < writes_per_op; writes++)
+				if (pg_pwrite(tmpfile,
+							  buf,
+							  XLOG_BLCKSZ,
+							  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
+					die("write failed");
+		}
+		STOP_TIMER;
+		close(tmpfile);
+	}
+#else
+	printf(NA_FORMAT, _("n/a"));
+#endif
+
 	/*
-	 * Test open_datasync if available
+	 * Test open_datasync buffered if available
 	 */
-	printf(LABEL_FORMAT, "open_datasync");
+	printf(LABEL_FORMAT, "open_datasync (buffered)");
 	fflush(stdout);
 
 #ifdef OPEN_DATASYNC_FLAG
-	if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
+	if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
 		fs_warning = true;
@@ -402,12 +433,12 @@ test_sync(int writes_per_op)
 #endif
 
 /*
- * Test open_sync if available
+ * Test open_sync with O_DIRECT if available
  */
-	printf(LABEL_FORMAT, "open_sync");
+	printf(LABEL_FORMAT, "open_sync (direct)");
 	fflush(stdout);
 
-#ifdef OPEN_SYNC_FLAG
+#if defined(OPEN_SYNC_FLAG) && (defined(O_DIRECT) || defined(F_NOCACHE))
 	if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
 	{
 		printf(NA_FORMAT, _("n/a*"));
@@ -439,6 +470,38 @@ test_sync(int writes_per_op)
 	printf(NA_FORMAT, _("n/a"));
 #endif
 
+/*
+ * Test open_sync if available
+ */
+	printf(LABEL_FORMAT, "open_sync (buffered)");
+	fflush(stdout);
+
+#ifdef OPEN_SYNC_FLAG
+	if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
+	{
+		printf(NA_FORMAT, _("n/a*"));
+		fs_warning = true;
+	}
+	else
+	{
+		START_TIMER;
+		for (ops = 0; alarm_triggered == false; ops++)
+		{
+			for (writes = 0; writes < writes_per_op; writes++)
+				if (pg_pwrite(tmpfile,
+							  buf,
+							  XLOG_BLCKSZ,
+							  writes * XLOG_BLCKSZ) != XLOG_BLCKSZ)
+					die("write failed");
+		}
+		STOP_TIMER;
+		close(tmpfile);
+	}
+#else
+	printf(NA_FORMAT, _("n/a"));
+#endif
+
+
 	if (fs_warning)
 	{
 		printf(_("* This file system and its mount options do not support direct\n"
-- 
2.35.1

From 56882f910e58f72987433c72c32fc6eedc1cbac9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 5 Feb 2022 09:15:47 +1300
Subject: [PATCH v3 2/2] Add wal_sync_method=fdatasync for Windows.

Windows 10 gained support for flushing files with fdatasync() semantics.
The main advantage over wal_sync_method=open_datasync is that the latter
does not flush SATA drive caches.

According to OS documentation FLUSH_FLAGS_FILE_DATA_SYNC_ONLY works
only on NTFS filesystems, so we can't consider making it the default.

Discussion: https://postgr.es/m/CA%2BhUKGJZJVO%3DiX%2Beb-PXi2_XS9ZRqnn_4URh0NUQOwt6-_51xQ%40mail.gmail.com
---
 configure                     |  6 ++++
 configure.ac                  |  1 +
 doc/src/sgml/wal.sgml         |  3 +-
 src/include/c.h               |  2 +-
 src/include/port/win32_port.h |  6 ++++
 src/include/port/win32ntdll.h | 10 ++++++-
 src/port/fdatasync.c          | 53 +++++++++++++++++++++++++++++++++++
 src/port/win32ntdll.c         |  6 +++-
 src/tools/msvc/Mkvcbuild.pm   |  3 +-
 src/tools/msvc/Solution.pm    |  2 +-
 10 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 src/port/fdatasync.c

diff --git a/configure b/configure
index 1e63c6862b..a03339301d 100755
--- a/configure
+++ b/configure
@@ -16980,6 +16980,12 @@ fi
  ;;
 esac
 
+  case " $LIBOBJS " in
+  *" fdatasync.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS fdatasync.$ac_objext"
+ ;;
+esac
+
   case " $LIBOBJS " in
   *" kill.$ac_objext "* ) ;;
   *) LIBOBJS="$LIBOBJS kill.$ac_objext"
diff --git a/configure.ac b/configure.ac
index 71191f14ad..9528f7fb42 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1975,6 +1975,7 @@ if test "$PORTNAME" = "win32"; then
   AC_CHECK_FUNCS(_configthreadlocale)
   AC_REPLACE_FUNCS(gettimeofday)
   AC_LIBOBJ(dirmod)
+  AC_LIBOBJ(fdatasync)
   AC_LIBOBJ(kill)
   AC_LIBOBJ(open)
   AC_LIBOBJ(system)
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 4b6ef283c1..01f7379ebb 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -108,7 +108,8 @@
         <literal>open_datasync</literal> (the default), write caching can be disabled
         by unchecking <literal>My Computer\Open\<replaceable>disk drive</replaceable>\Properties\Hardware\Properties\Policies\Enable write caching on the disk</literal>.
         Alternatively, set <varname>wal_sync_method</varname> to
-        <literal>fsync</literal> or <literal>fsync_writethrough</literal>, which prevent
+        <literal>fdatasync</literal> (NTFS only), <literal>fsync</literal> or
+        <literal>fsync_writethrough</literal>, which prevent
         write caching.
       </para>
     </listitem>
diff --git a/src/include/c.h b/src/include/c.h
index 863a16c6a6..fdf89e2742 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -1290,7 +1290,7 @@ typedef union PGAlignedXLogBlock
  * standard C library.
  */
 
-#if defined(HAVE_FDATASYNC) && !HAVE_DECL_FDATASYNC
+#if !HAVE_DECL_FDATASYNC
 extern int	fdatasync(int fildes);
 #endif
 
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 5121c0c626..5ea66528fa 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -83,6 +83,12 @@
 #define HAVE_FSYNC_WRITETHROUGH
 #define FSYNC_WRITETHROUGH_IS_FSYNC
 
+/*
+ * We have a replacement for fdatasync() in src/port/fdatasync.c, which is
+ * unconditionally used by MSVC and Mingw builds.
+ */
+#define HAVE_FDATASYNC
+
 #define USES_WINSOCK
 
 /*
diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h
index 291b067ea4..34cebddd54 100644
--- a/src/include/port/win32ntdll.h
+++ b/src/include/port/win32ntdll.h
@@ -23,9 +23,17 @@
 #include <ntstatus.h>
 #include <winternl.h>
 
-typedef NTSTATUS (__stdcall * RtlGetLastNtStatus_t) (void);
+#ifndef FLUSH_FLAGS_FILE_DATA_SYNC_ONLY
+#define FLUSH_FLAGS_FILE_DATA_SYNC_ONLY 0x4
+#endif
+
+typedef NTSTATUS (__stdcall *RtlGetLastNtStatus_t) (void);
+typedef ULONG (__stdcall *RtlNtStatusToDosError_t) (NTSTATUS);
+typedef NTSTATUS (__stdcall *NtFlushBuffersFileEx_t) (HANDLE, ULONG, PVOID, ULONG, PIO_STATUS_BLOCK);
 
 extern PGDLLIMPORT RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+extern PGDLLIMPORT RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+extern PGDLLIMPORT NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
 
 extern int	initialize_ntdll(void);
 
diff --git a/src/port/fdatasync.c b/src/port/fdatasync.c
new file mode 100644
index 0000000000..afef853aa3
--- /dev/null
+++ b/src/port/fdatasync.c
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * fdatasync.c
+ *	   Win32 fdatasync() replacement
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ *
+ * src/port/fdatasync.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define UMDF_USING_NTSTATUS
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#include "port/win32ntdll.h"
+
+int
+fdatasync(int fd)
+{
+	IO_STATUS_BLOCK iosb;
+	NTSTATUS	status;
+	HANDLE		handle;
+
+	handle = (HANDLE) _get_osfhandle(fd);
+	if (handle == INVALID_HANDLE_VALUE)
+	{
+		errno = EBADF;
+		return -1;
+	}
+
+	if (initialize_ntdll() < 0)
+		return -1;
+
+	memset(&iosb, 0, sizeof(iosb));
+	status = pg_NtFlushBuffersFileEx(handle,
+									 FLUSH_FLAGS_FILE_DATA_SYNC_ONLY,
+									 NULL,
+									 0,
+									 &iosb);
+
+	if (NT_SUCCESS(status))
+		return 0;
+
+	_dosmaperr(pg_RtlNtStatusToDosError(status));
+	return -1;
+}
diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c
index 10c33c6a01..eb61407754 100644
--- a/src/port/win32ntdll.c
+++ b/src/port/win32ntdll.c
@@ -20,6 +20,8 @@
 #include "port/win32ntdll.h"
 
 RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
 
 typedef struct NtDllRoutine
 {
@@ -28,7 +30,9 @@ typedef struct NtDllRoutine
 } NtDllRoutine;
 
 static const NtDllRoutine routines[] = {
-	{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus}
+	{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus},
+	{"RtlNtStatusToDosError", (pg_funcptr_t *) &pg_RtlNtStatusToDosError},
+	{"NtFlushBuffersFileEx", (pg_funcptr_t *) &pg_NtFlushBuffersFileEx}
 };
 
 static bool initialized;
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index e4feda10fd..cc7a908d10 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -99,7 +99,8 @@ sub mkvcbuild
 	$solution = CreateSolution($vsVersion, $config);
 
 	our @pgportfiles = qw(
-	  chklocale.c explicit_bzero.c fls.c getpeereid.c getrusage.c inet_aton.c
+	  chklocale.c explicit_bzero.c fls.c fdatasync.c
+	  getpeereid.c getrusage.c inet_aton.c
 	  getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c
 	  snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c
 	  dirent.c dlopen.c getopt.c getopt_long.c link.c
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index fa32dc371d..c87bca8e6a 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -257,7 +257,7 @@ sub GenerateFiles
 		HAVE_EDITLINE_READLINE_H                    => undef,
 		HAVE_EXECINFO_H                             => undef,
 		HAVE_EXPLICIT_BZERO                         => undef,
-		HAVE_FDATASYNC                              => undef,
+		HAVE_FDATASYNC                              => 1,
 		HAVE_FLS                                    => undef,
 		HAVE_FSEEKO                                 => 1,
 		HAVE_FUNCNAME__FUNC                         => undef,
-- 
2.35.1

Reply via email to