The cfbot showed issues compiling on linux and windows.
http://cfbot.cputube.org/takashi-menjo.html

https://cirrus-ci.com/task/6125740327436288
[02:30:06.538] In file included from xlog.c:38:
[02:30:06.538] ../../../../src/include/access/xlogpmem.h:32:42: error: unknown 
type name ‘tli’
[02:30:06.538]    32 | PmemXLogEnsurePrevMapped(XLogRecPtr ptr, tli)
[02:30:06.538]       |                                          ^~~
[02:30:06.538] xlog.c: In function ‘GetXLogBuffer’:
[02:30:06.538] xlog.c:1959:19: warning: implicit declaration of function 
‘PmemXLogEnsurePrevMapped’ [-Wimplicit-function-declaration]
[02:30:06.538]  1959 |    openLogSegNo = PmemXLogEnsurePrevMapped(endptr, tli);

https://cirrus-ci.com/task/6688690280857600?logs=build#L379
[02:33:25.752] c:\cirrus\src\include\access\xlogpmem.h(33,1): error C2081: 
'tli': name in formal parameter list illegal (compiling source file 
src/backend/access/transam/xlog.c) [c:\cirrus\postgres.vcxproj]

I'm attaching a probable fix.  Unfortunately, for patches like this, most of
the functionality isn't exercised unless the library is installed and
compilation and runtime are enabled by default.

In 0009: recaluculated => recalculated

0010-Update-document should be squished with 0003-Add-wal_pmem_map-to-GUC (and
maybe 0002 and 0001).  I believe the patches after 0005 are more WIP, so it's
fine if they're not squished yet.  I'm not sure what the point is of this one:
0008-Let-wal_pmem_map-be-constant-unl

+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not pmem_map_file \"%s\": %m", 
path)));

=> The outer parenthesis are not needed since e3a87b4.
>From e5614f2ea3ff6aaf016343f81f74366440e18f6f Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Tue, 23 Mar 2021 13:32:27 +0900
Subject: [PATCH 01/13] Add --with-libpmem option for PMEM support

---
 configure                  | 99 ++++++++++++++++++++++++++++++++++++++
 configure.ac               | 17 +++++++
 src/include/pg_config.h.in |  6 +++
 3 files changed, 122 insertions(+)

diff --git a/configure b/configure
index 3b19105328d..22c364fac4f 100755
--- a/configure
+++ b/configure
@@ -699,6 +699,7 @@ with_gnu_ld
 LD
 LDFLAGS_SL
 LDFLAGS_EX
+with_libpmem
 LZ4_LIBS
 LZ4_CFLAGS
 with_lz4
@@ -868,6 +869,7 @@ with_libxslt
 with_system_tzdata
 with_zlib
 with_lz4
+with_libpmem
 with_gnu_ld
 with_ssl
 with_openssl
@@ -1576,6 +1578,7 @@ Optional Packages:
                           use system time zone data in DIR
   --without-zlib          do not use Zlib
   --with-lz4              build with LZ4 support
+  --with-libpmem          build with PMEM support
   --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
   --with-ssl=LIB          use LIB for SSL/TLS support (openssl)
   --with-openssl          obsolete spelling of --with-ssl=openssl
@@ -9033,6 +9036,41 @@ fi
   done
 fi
 
+#
+# libpmem
+#
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with PMEM support" >&5
+$as_echo_n "checking whether to build with PMEM support... " >&6; }
+
+
+
+# Check whether --with-libpmem was given.
+if test "${with_libpmem+set}" = set; then :
+  withval=$with_libpmem;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_LIBPMEM 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-libpmem option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_libpmem=no
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_libpmem" >&5
+$as_echo "$with_libpmem" >&6; }
+
+
 #
 # Assignments
 #
@@ -13504,6 +13542,56 @@ fi
 fi
 
 
+if test "$with_libpmem" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pmem_memcpy in -lpmem" >&5
+$as_echo_n "checking for pmem_memcpy in -lpmem... " >&6; }
+if ${ac_cv_lib_pmem_pmem_memcpy+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpmem  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pmem_memcpy ();
+int
+main ()
+{
+return pmem_memcpy ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_pmem_pmem_memcpy=yes
+else
+  ac_cv_lib_pmem_pmem_memcpy=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pmem_pmem_memcpy" >&5
+$as_echo "$ac_cv_lib_pmem_pmem_memcpy" >&6; }
+if test "x$ac_cv_lib_pmem_pmem_memcpy" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBPMEM 1
+_ACEOF
+
+  LIBS="-lpmem $LIBS"
+
+else
+  as_fn_error $? "library 'libpmem' (version >= 1.5) is required for PMEM support" "$LINENO" 5
+fi
+
+fi
+
 
 ##
 ## Header files
@@ -14215,6 +14303,17 @@ fi
 
 done
 
+fi
+
+if test "$with_libpmem" = yes ; then
+  ac_fn_c_check_header_mongrel "$LINENO" "libpmem.h" "ac_cv_header_libpmem_h" "$ac_includes_default"
+if test "x$ac_cv_header_libpmem_h" = xyes; then :
+
+else
+  as_fn_error $? "header file <libpmem.h> is required for PMEM support" "$LINENO" 5
+fi
+
+
 fi
 
 ##
diff --git a/configure.ac b/configure.ac
index e77d4dcf2d2..2bea131375a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1056,6 +1056,15 @@ if test "$with_lz4" = yes; then
   done
 fi
 
+#
+# libpmem
+#
+AC_MSG_CHECKING([whether to build with PMEM support])
+PGAC_ARG_BOOL(with, libpmem, no, [build with PMEM support],
+              [AC_DEFINE([USE_LIBPMEM], 1, [Define to 1 to build with PMEM support. (--with-libpmem)])])
+AC_MSG_RESULT([$with_libpmem])
+AC_SUBST(with_libpmem)
+
 #
 # Assignments
 #
@@ -1385,6 +1394,10 @@ elif test "$with_uuid" = ossp ; then
 fi
 AC_SUBST(UUID_LIBS)
 
+if test "$with_libpmem" = yes; then
+  AC_CHECK_LIB(pmem, pmem_memcpy, [], [AC_MSG_ERROR([library 'libpmem' (version >= 1.5) is required for PMEM support])])
+fi
+
 
 ##
 ## Header files
@@ -1571,6 +1584,10 @@ if test "$PORTNAME" = "win32" ; then
    AC_CHECK_HEADERS(crtdefs.h)
 fi
 
+if test "$with_libpmem" = yes ; then
+  AC_CHECK_HEADER(libpmem.h, [], [AC_MSG_ERROR([header file <libpmem.h> is required for PMEM support])])
+fi
+
 ##
 ## Types, structures, compiler characteristics
 ##
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 7525c165974..96604aa130d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -331,6 +331,9 @@
 /* Define to 1 if you have the `pam' library (-lpam). */
 #undef HAVE_LIBPAM
 
+/* Define to 1 if you have the `pmem' library (-lpmem). */
+#undef HAVE_LIBPMEM
+
 /* Define if you have a function readline library */
 #undef HAVE_LIBREADLINE
 
@@ -898,6 +901,9 @@
 /* Define to 1 to build with LDAP support. (--with-ldap) */
 #undef USE_LDAP
 
+/* Define to 1 to build with PMEM support. (--with-libpmem) */
+#undef USE_LIBPMEM
+
 /* Define to 1 to build with XML support. (--with-libxml) */
 #undef USE_LIBXML
 
-- 
2.17.1

>From f014b7923e9cee1f1f3cfd3ea23fc91ace3f1474 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Fri, 5 Nov 2021 14:16:33 +0900
Subject: [PATCH 02/13] Support build with MSVC on Windows

---
 src/tools/msvc/Solution.pm       | 13 +++++++++++++
 src/tools/msvc/config_default.pl |  3 ++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 2c8cd521e94..d843391050b 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -304,6 +304,7 @@ sub GenerateFiles
 		HAVE_LIBLZ4                                 => undef,
 		HAVE_LIBM                                   => undef,
 		HAVE_LIBPAM                                 => undef,
+		HAVE_LIBPMEM                                => undef,
 		HAVE_LIBREADLINE                            => undef,
 		HAVE_LIBSELINUX                             => undef,
 		HAVE_LIBSSL                                 => undef,
@@ -488,6 +489,7 @@ sub GenerateFiles
 		USE_BONJOUR         => undef,
 		USE_BSD_AUTH        => undef,
 		USE_ICU => $self->{options}->{icu} ? 1 : undef,
+		USE_LIBPMEM                => undef,
 		USE_LIBXML                 => undef,
 		USE_LIBXSLT                => undef,
 		USE_LZ4                    => undef,
@@ -538,6 +540,11 @@ sub GenerateFiles
 		$define{HAVE_LZ4_H}  = 1;
 		$define{USE_LZ4}     = 1;
 	}
+	if ($self->{options}->{pmem})
+	{
+		$define{HAVE_LIBPMEM} = 1;
+		$define{USE_LIBPMEM}  = 1;
+	}
 	if ($self->{options}->{openssl})
 	{
 		$define{USE_OPENSSL} = 1;
@@ -1085,6 +1092,11 @@ sub AddProject
 		$proj->AddIncludeDir($self->{options}->{uuid} . '\include');
 		$proj->AddLibrary($self->{options}->{uuid} . '\lib\uuid.lib');
 	}
+	if ($self->{options}->{pmem})
+	{
+		$proj->AddIncludeDir($self->{options}->{pmem} . '\include');
+		$proj->AddLibrary($self->{options}->{pmem} . '\lib\libpmem.lib');
+	}
 	return $proj;
 }
 
@@ -1197,6 +1209,7 @@ sub GetFakeConfigure
 	$cfg .= ' --with-tcl'           if ($self->{options}->{tcl});
 	$cfg .= ' --with-perl'          if ($self->{options}->{perl});
 	$cfg .= ' --with-python'        if ($self->{options}->{python});
+	$cfg .= ' --with-libpmem'       if ($self->{options}->{pmem});
 	my $port = $self->{options}->{'--with-pgport'};
 	$cfg .= " --with-pgport=$port" if defined($port);
 
diff --git a/src/tools/msvc/config_default.pl b/src/tools/msvc/config_default.pl
index 460c0375d4b..774730c9a8f 100644
--- a/src/tools/msvc/config_default.pl
+++ b/src/tools/msvc/config_default.pl
@@ -25,7 +25,8 @@ our $config = {
 	xml       => undef,    # --with-libxml=<path>
 	xslt      => undef,    # --with-libxslt=<path>
 	iconv     => undef,    # (not in configure, path to iconv)
-	zlib      => undef     # --with-zlib=<path>
+	zlib      => undef,    # --with-zlib=<path>
+	pmem      => undef     # --with-libpmem=<path>
 };
 
 1;
-- 
2.17.1

>From 7c2d6665a925dc5615f6ffd999374e10c3ea2199 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Thu, 11 Mar 2021 17:55:53 +0900
Subject: [PATCH 03/13] Add wal_pmem_map to GUC

---
 src/backend/access/transam/xlog.c | 51 ++++++++++++++++++++++++-------
 src/backend/utils/misc/guc.c      | 14 +++++++++
 src/include/access/xlog.h         |  1 +
 3 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 87cd05c9454..02f63c31387 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -115,6 +115,7 @@ int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
 int			wal_retrieve_retry_interval = 5000;
 int			max_slot_wal_keep_size_mb = -1;
 bool		track_wal_io_timing = false;
+bool		wal_pmem_map = false;
 
 #ifdef WAL_DEBUG
 bool		XLOG_DEBUG = false;
@@ -5194,13 +5195,28 @@ XLOGShmemSize(void)
 {
 	Size		size;
 
+	/*
+	 * If we use WAL segment files as WAL buffers, we don't use the given
+	 * value of wal_buffers. Instead, we set it to the value based on the
+	 * segment size and the page size. This should be done before calculating
+	 * the size of xlblocks array.
+	 */
+	if (wal_pmem_map)
+	{
+		int			npages;
+		char		buf[32];
+
+		npages = wal_segment_size / XLOG_BLCKSZ;
+		snprintf(buf, sizeof(buf), "%d", (int) npages);
+		SetConfigOption("wal_buffers", buf, PGC_POSTMASTER, PGC_S_OVERRIDE);
+	}
 	/*
 	 * If the value of wal_buffers is -1, use the preferred auto-tune value.
 	 * This isn't an amazingly clean place to do this, but we must wait till
 	 * NBuffers has received its final value, and must do it before using the
 	 * value of XLOGbuffers to do anything important.
 	 */
-	if (XLOGbuffers == -1)
+	else if (XLOGbuffers == -1)
 	{
 		char		buf[32];
 
@@ -5216,10 +5232,17 @@ XLOGShmemSize(void)
 	size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
 	/* xlblocks array */
 	size = add_size(size, mul_size(sizeof(XLogRecPtr), XLOGbuffers));
-	/* extra alignment padding for XLOG I/O buffers */
-	size = add_size(size, XLOG_BLCKSZ);
-	/* and the buffers themselves */
-	size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
+
+	/*
+	 * If we use WAL segment files as WAL buffers, we don't need volatile ones.
+	 */
+	if (!wal_pmem_map)
+	{
+		/* extra alignment padding for XLOG I/O buffers */
+		size = add_size(size, XLOG_BLCKSZ);
+		/* and the buffers themselves */
+		size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
+	}
 
 	/*
 	 * Note: we don't count ControlFileData, it comes out of the "slop factor"
@@ -5313,13 +5336,19 @@ XLOGShmemInit(void)
 	}
 
 	/*
-	 * Align the start of the page buffers to a full xlog block size boundary.
-	 * This simplifies some calculations in XLOG insertion. It is also
-	 * required for O_DIRECT.
+	 * If we use WAL segment files as WAL buffers, we don't need volatile ones.
 	 */
-	allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
-	XLogCtl->pages = allocptr;
-	memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
+	if (!wal_pmem_map)
+	{
+		/*
+		 * Align the start of the page buffers to a full xlog block size boundary.
+		 * This simplifies some calculations in XLOG insertion. It is also
+		 * required for O_DIRECT.
+		 */
+		allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
+		XLogCtl->pages = allocptr;
+		memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
+	}
 
 	/*
 	 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f9504d3aec4..ee18a9cf338 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1344,6 +1344,20 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+#ifdef USE_LIBPMEM
+	{
+		{"wal_pmem_map", PGC_POSTMASTER, WAL_SETTINGS,
+			gettext_noop("Map WAL segment files on PMEM as WAL buffers."),
+			gettext_noop("If true, postgres will memory-map WAL segment files "
+						 "on PMEM to use them as WAL buffers instead of the "
+						 "traditional volatile ones."),
+		},
+		&wal_pmem_map,
+		false,
+		NULL, NULL, NULL
+	},
+#endif
+
 	{
 		{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs each checkpoint."),
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 34f6c89f067..73900cbc9e7 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -88,6 +88,7 @@ extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
 extern bool wal_receiver_create_temp_slot;
 extern bool track_wal_io_timing;
+extern bool wal_pmem_map;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
-- 
2.17.1

>From a09110f5932cac18d7822d296d189de47698f70a Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Tue, 23 Mar 2021 07:32:05 +0900
Subject: [PATCH 04/13] Export InstallXLogFileSegment

---
 src/backend/access/transam/xlog.c  | 5 +----
 src/include/access/xlog_internal.h | 4 ++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 02f63c31387..73a3477be04 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -923,9 +923,6 @@ static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli,
 								  bool opportunistic);
 static bool XLogCheckpointNeeded(XLogSegNo new_segno);
 static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible);
-static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
-								   bool find_free, XLogSegNo max_segno,
-								   TimeLineID tli);
 static int	XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
 						 XLogSource source, bool notfoundOk);
 static int	XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source);
@@ -3700,7 +3697,7 @@ XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno,
  * max_segno limit was exceeded, the startup process has disabled this
  * function for now, or an error occurred while renaming the file into place.
  */
-static bool
+bool
 InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
 					   bool find_free, XLogSegNo max_segno, TimeLineID tli)
 {
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index c0da76cab49..328128b48d5 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -324,6 +324,10 @@ extern XLogRecPtr RequestXLogSwitch(bool mark_unimportant);
 
 extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
 
+extern bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
+								   bool find_free, XLogSegNo max_segno,
+								   TimeLineID tli);
+
 /*
  * Exported for the functions in timeline.c and xlogarchive.c.  Only valid
  * in the startup process.
-- 
2.17.1

>From 443e47112de2d8c735be0ce9bddb01e7e77de672 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Tue, 23 Mar 2021 11:45:44 +0900
Subject: [PATCH 05/13] Map WAL segment files on PMEM as WAL buffers

Fixes introduced in patchset v2:
- Keep openLogSegNo even if wal_pmem_map=true
- Fix sync issue of PmemXLogCreate
- Fix unmapping issue of PmemXLogUnmap
- Remove unused XLogPageOffset
---
 src/backend/access/transam/Makefile   |   1 +
 src/backend/access/transam/xlog.c     | 153 +++++++++----
 src/backend/access/transam/xlogpmem.c | 297 ++++++++++++++++++++++++++
 src/include/access/xlogpmem.h         |  59 +++++
 4 files changed, 474 insertions(+), 36 deletions(-)
 create mode 100644 src/backend/access/transam/xlogpmem.c
 create mode 100644 src/include/access/xlogpmem.h

diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 595e02de722..3a29583bc03 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -31,6 +31,7 @@ OBJS = \
 	xlogarchive.o \
 	xlogfuncs.o \
 	xloginsert.o \
+	xlogpmem.o \
 	xlogreader.o \
 	xlogutils.o
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 73a3477be04..be56599f9fd 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -35,6 +35,7 @@
 #include "access/xlog_internal.h"
 #include "access/xlogarchive.h"
 #include "access/xloginsert.h"
+#include "access/xlogpmem.h"
 #include "access/xlogreader.h"
 #include "access/xlogutils.h"
 #include "catalog/catversion.h"
@@ -2024,7 +2025,14 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
 	 * offset within the page.
 	 */
 	cachedPage = ptr / XLOG_BLCKSZ;
-	cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
+	if (wal_pmem_map)
+	{
+		openLogTLI = tli;
+		openLogSegNo = PmemXLogEnsurePrevMapped(endptr, tli);
+		cachedPos = PmemXLogGetBufferPages() + idx * (Size) XLOG_BLCKSZ;
+	}
+	else
+		cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
 
 	Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
 	Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
@@ -2258,7 +2266,14 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
 		Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
 
-		NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
+		if (wal_pmem_map)
+		{
+			openLogTLI = tli;
+			openLogSegNo = PmemXLogEnsurePrevMapped(NewPageEndPtr, tli);
+			NewPage = (XLogPageHeader) (PmemXLogGetBufferPages() + nextidx * (Size) XLOG_BLCKSZ);
+		}
+		else
+			NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
 
 		/*
 		 * Be sure to re-zero the buffer so that bytes beyond what we've
@@ -2477,6 +2492,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 	int			npages;
 	int			startidx;
 	uint32		startoffset;
+	bool		isfirstpage;
+	XLogRecPtr	startpageptr;
 
 	/* We should always be inside a critical section here */
 	Assert(CritSectionCount > 0);
@@ -2499,6 +2516,10 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 	startidx = 0;
 	startoffset = 0;
 
+	/* Those are used actually only if wal_pmem_map=true */
+	isfirstpage = true;
+	startpageptr = 0;
+
 	/*
 	 * Within the loop, curridx is the cache block index of the page to
 	 * consider writing.  Begin at the buffer containing the next unwritten
@@ -2524,33 +2545,36 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		LogwrtResult.Write = EndPtr;
 		ispartialpage = WriteRqst.Write < LogwrtResult.Write;
 
-		if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
-							 wal_segment_size))
+		if (!wal_pmem_map)
 		{
-			/*
-			 * Switch to new logfile segment.  We cannot have any pending
-			 * pages here (since we dump what we have at segment end).
-			 */
-			Assert(npages == 0);
-			if (openLogFile >= 0)
-				XLogFileClose();
-			XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
-							wal_segment_size);
-			openLogTLI = tli;
+			if (!XLByteInPrevSeg(LogwrtResult.Write, openLogSegNo,
+								 wal_segment_size))
+			{
+				/*
+				 * Switch to new logfile segment.  We cannot have any pending
+				 * pages here (since we dump what we have at segment end).
+				 */
+				Assert(npages == 0);
+				if (openLogFile >= 0)
+					XLogFileClose();
+				XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
+								wal_segment_size);
+				openLogTLI = tli;
 
-			/* create/use new log file */
-			openLogFile = XLogFileInit(openLogSegNo, tli);
-			ReserveExternalFD();
-		}
+				/* create/use new log file */
+				openLogFile = XLogFileInit(openLogSegNo, tli);
+				ReserveExternalFD();
+			}
 
-		/* Make sure we have the current logfile open */
-		if (openLogFile < 0)
-		{
-			XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
-							wal_segment_size);
-			openLogTLI = tli;
-			openLogFile = XLogFileOpen(openLogSegNo, tli);
-			ReserveExternalFD();
+			/* Make sure we have the current logfile open */
+			if (openLogFile < 0)
+			{
+				XLByteToPrevSeg(LogwrtResult.Write, openLogSegNo,
+								wal_segment_size);
+				openLogTLI = tli;
+				openLogFile = XLogFileOpen(openLogSegNo, tli);
+				ReserveExternalFD();
+			}
 		}
 
 		/* Add current page to the set of pending pages-to-dump */
@@ -2558,8 +2582,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		{
 			/* first of group */
 			startidx = curridx;
-			startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
-											wal_segment_size);
+			startpageptr = LogwrtResult.Write - XLOG_BLCKSZ;
+			startoffset = XLogSegmentOffset(startpageptr, wal_segment_size);
 		}
 		npages++;
 
@@ -2597,7 +2621,38 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 					INSTR_TIME_SET_CURRENT(start);
 
 				pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
-				written = pg_pwrite(openLogFile, from, nleft, startoffset);
+
+				/*
+				 * If we use a WAL segment file as WAL buffers, we cache-flush
+				 * records on the buffers byte by byte, not page by page. To do
+				 * so, here we fix the range being cache-flushed.
+				 */
+				if (wal_pmem_map)
+				{
+					XLogRecPtr	startbyteptr;
+					XLogRecPtr	endbyteptr;
+
+					startbyteptr = (isfirstpage)
+								 ? XLogCtl->LogwrtResult.Write
+								 : startpageptr;
+
+					endbyteptr = (ispartialpage)
+							   ? WriteRqst.Write
+							   : LogwrtResult.Write;
+
+					/* Now we cache-flush records */
+					openLogTLI = tli;
+					openLogSegNo = PmemXLogEnsurePrevMapped(endbyteptr, tli);
+					PmemXLogFlush(startbyteptr, endbyteptr);
+
+					/* Mark the first page is consumed */
+					isfirstpage = false;
+
+					/* Tell all the "pages" have been written successfully */
+					written = nleft;
+				}
+				else
+					written = pg_pwrite(openLogFile, from, nleft, startoffset);
 				pgstat_report_wait_end();
 
 				/*
@@ -2655,7 +2710,10 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 			 */
 			if (finishing_seg)
 			{
-				issue_xlog_fsync(openLogFile, openLogSegNo, tli);
+				if (wal_pmem_map)
+					PmemXLogSync();
+				else
+					issue_xlog_fsync(openLogFile, openLogSegNo, tli);
 
 				/* signal that we need to wakeup walsenders later */
 				WalSndWakeupRequest();
@@ -2706,12 +2764,14 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		LogwrtResult.Flush < LogwrtResult.Write)
 
 	{
+		if (wal_pmem_map)
+			PmemXLogSync();
 		/*
 		 * Could get here without iterating above loop, in which case we might
 		 * have no open file or the wrong one.  However, we do not need to
 		 * fsync more than one file.
 		 */
-		if (sync_method != SYNC_METHOD_OPEN &&
+		else if (sync_method != SYNC_METHOD_OPEN &&
 			sync_method != SYNC_METHOD_OPEN_DSYNC)
 		{
 			if (openLogFile >= 0 &&
@@ -8099,11 +8159,32 @@ StartupXLOG(void)
 
 		firstIdx = XLogRecPtrToBufIdx(EndOfLog);
 
-		/* Copy the valid part of the last block, and zero the rest */
-		page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
-		len = EndOfLog % XLOG_BLCKSZ;
-		memcpy(page, xlogreader->readBuf, len);
-		memset(page + len, 0, XLOG_BLCKSZ - len);
+		if (wal_pmem_map)
+		{
+			/*
+			 * Keep the valid part of the last block, and zero the rest.
+			 * Note that "len" indicates the size of the valid part.
+			 *
+			 * TODO how about if (newTLI != replayTLI) ?
+			 */
+			openLogTLI = newTLI;
+			openLogSegNo = PmemXLogEnsurePrevMapped(EndOfLog, newTLI);
+			page = PmemXLogGetBufferPages() + firstIdx * (Size) XLOG_BLCKSZ;
+			len = EndOfLog % XLOG_BLCKSZ;
+			memset(page + len, 0, XLOG_BLCKSZ - len);
+
+			/* Cache-flush and sync now */
+			PmemXLogFlush(EndOfLog, pageBeginPtr + XLOG_BLCKSZ);
+			PmemXLogSync();
+		}
+		else
+		{
+			/* Copy the valid part of the last block, and zero the rest */
+			page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
+			len = EndOfLog % XLOG_BLCKSZ;
+			memcpy(page, xlogreader->readBuf, len);
+			memset(page + len, 0, XLOG_BLCKSZ - len);
+		}
 
 		XLogCtl->xlblocks[firstIdx] = pageBeginPtr + XLOG_BLCKSZ;
 		XLogCtl->InitializedUpTo = pageBeginPtr + XLOG_BLCKSZ;
diff --git a/src/backend/access/transam/xlogpmem.c b/src/backend/access/transam/xlogpmem.c
new file mode 100644
index 00000000000..5b50ba80a7a
--- /dev/null
+++ b/src/backend/access/transam/xlogpmem.c
@@ -0,0 +1,297 @@
+#include "postgres.h"
+
+#ifdef USE_LIBPMEM
+
+#include <errno.h>
+#include <limits.h>		/* INT_MAX */
+#include <stddef.h>		/* size_t */
+#include <stdint.h>		/* uintptr_t */
+#include <unistd.h>		/* getpid, unlink */
+
+#include <libpmem.h>
+
+#include "c.h"						/* bool, Size */
+#include "access/xlog.h"
+#include "access/xlog_internal.h"	/* XLogFilePath, XLByteToSeg */
+#include "access/xlogpmem.h"
+#include "common/file_perm.h"		/* pg_file_create_mode */
+#include "miscadmin.h"				/* enableFsync */
+#include "pgstat.h"
+
+static char *mappedPages = NULL;
+static XLogSegNo mappedSegNo = 0;
+
+#define PG_DAX_HUGEPAGE_SIZE (((uintptr_t) 1) << 21)
+#define PG_DAX_HUGEPAGE_MASK (~(PG_DAX_HUGEPAGE_SIZE - 1))
+
+static XLogSegNo PmemXLogMap(XLogSegNo segno, TimeLineID tli);
+static void PmemXLogCreate(XLogSegNo segno, TimeLineID tli);
+static void PmemXLogUnmap(void);
+
+static void *PmemCreateMapFile(const char *path, size_t len);
+static void *PmemOpenMapFile(const char *path, size_t expected_len);
+static void *PmemTryOpenMapFile(const char *path, size_t expected_len);
+static void *PmemMapFile(const char *path, size_t expected_len, int flags,
+						 bool try_open);
+static void PmemUnmapForError(void *addr, size_t len);
+
+/*
+ * Ensures the WAL segment containg {ptr-1} to be mapped.
+ *
+ * Returns mapped XLogSegNo.
+ */
+XLogSegNo
+PmemXLogEnsurePrevMapped(XLogRecPtr ptr, TimeLineID tli)
+{
+	XLogSegNo	segno;
+
+	Assert(wal_pmem_map);
+
+	XLByteToPrevSeg(ptr, segno, wal_segment_size);
+
+	if (mappedPages != NULL)
+	{
+		/* Fast return: The segment we need is already mapped */
+		if (mappedSegNo == segno)
+			return mappedSegNo;
+
+		/* Unmap the current segment we don't need */
+		PmemXLogUnmap();
+	}
+
+	return PmemXLogMap(segno, tli);
+}
+
+/*
+ * Creates a new XLOG file segment, or open a pre-existing one, for WAL buffers.
+ *
+ * Returns mapped XLogSegNo.
+ *
+ * See also XLogFileInit in xlog.c.
+ */
+static XLogSegNo
+PmemXLogMap(XLogSegNo segno, TimeLineID tli)
+{
+	char		path[MAXPGPATH];
+
+	Assert(mappedPages == NULL);
+
+	XLogFilePath(path, tli, segno, wal_segment_size);
+
+	/* PmemTryOpenMapFile will handle error except ENOENT */
+	mappedPages = PmemTryOpenMapFile(path, wal_segment_size);
+
+	/* Fast return if already exists */
+	if (mappedPages != NULL)
+	{
+		mappedSegNo = segno;
+		return mappedSegNo;
+	}
+
+	elog(DEBUG2, "creating and filling new WAL file");
+	PmemXLogCreate(segno, tli);
+
+	/* PmemCreateMapFile will handle error */
+	mappedPages = PmemOpenMapFile(path, wal_segment_size);
+	mappedSegNo = segno;
+
+	elog(DEBUG2, "done creating and filling new WAL file");
+	return mappedSegNo;
+}
+
+/*
+ * Creates a new XLOG file segment.
+ *
+ * See also XLogFileInit in xlog.c.
+ */
+static void
+PmemXLogCreate(XLogSegNo segno, TimeLineID tli)
+{
+	char	   *addr;
+	char		tmppath[MAXPGPATH];
+	XLogSegNo	inst_segno;
+	XLogSegNo	max_segno;
+
+	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
+	unlink(tmppath);
+
+	/* PmemCreateMapFile will handle error */
+	addr = PmemCreateMapFile(tmppath, wal_segment_size);
+
+	/*
+	 * Initialize whole the buffers.
+	 *
+	 * Note that we don't put any single byte if not wal_init_zero. It's okay
+	 * because we already have a new segment file truncated to the proper size.
+	 */
+	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
+	if (wal_init_zero)
+		pmem_memset_nodrain(addr, 0, wal_segment_size);
+	pgstat_report_wait_end();
+
+	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
+	if (enableFsync)
+		pmem_drain();
+	pgstat_report_wait_end();
+
+	if (pmem_unmap(addr, wal_segment_size) < 0)
+		elog(ERROR, "could not pmem_unmap temporal WAL buffers: %m");
+
+	inst_segno = segno;
+	max_segno = segno + CheckPointSegments;
+	if (!InstallXLogFileSegment(&inst_segno, tmppath, true, max_segno, tli))
+		unlink(tmppath);
+}
+
+/*
+ * Unmaps the current WAL segment file if mapped.
+ */
+static void
+PmemXLogUnmap(void)
+{
+	/* Fast return if not mapped */
+	if (mappedPages == NULL)
+		return;
+
+	if (pmem_unmap(mappedPages, wal_segment_size) < 0)
+		elog(ERROR, "could not pmem_unmap WAL buffers: %m");
+
+	mappedPages = NULL;
+}
+
+/*
+ * Gets the head address of the WAL buffers.
+ */
+char *
+PmemXLogGetBufferPages(void)
+{
+	Assert(wal_pmem_map);
+	Assert(mappedPages != NULL);
+
+	return mappedPages;
+}
+
+/*
+ * Flushes records in the given range [start, end) within a single segment.
+ */
+void
+PmemXLogFlush(XLogRecPtr start, XLogRecPtr end)
+{
+	Size		off;
+
+	Assert(wal_pmem_map);
+	Assert(start < end);
+	Assert(mappedPages != NULL);
+	Assert(XLByteInSeg(start, mappedSegNo, wal_segment_size));
+	Assert(XLByteInPrevSeg(end, mappedSegNo, wal_segment_size));
+
+	off = XLogSegmentOffset(start, wal_segment_size);
+	pmem_flush(mappedPages + off, end - start);
+}
+
+/*
+ * Wait for cache-flush to finish.
+ */
+void
+PmemXLogSync(void)
+{
+	Assert(wal_pmem_map);
+
+	/* Fast return */
+	if (!enableFsync)
+		return;
+
+	pmem_drain();
+}
+
+/*
+ * Wrappers for pmem_map_file.
+ */
+static void *
+PmemCreateMapFile(const char *path, size_t len)
+{
+	return PmemMapFile(path, len, PMEM_FILE_CREATE | PMEM_FILE_EXCL, false);
+}
+
+static void *
+PmemOpenMapFile(const char *path, size_t expected_len)
+{
+	return PmemMapFile(path, expected_len, 0, false);
+}
+
+static void *
+PmemTryOpenMapFile(const char *path, size_t expected_len)
+{
+	return PmemMapFile(path, expected_len, 0, true);
+}
+
+static void *
+PmemMapFile(const char *path, size_t expected_len, int flags, bool try_open)
+{
+	size_t		param_len;
+	int			mode;
+	size_t		mapped_len;
+	int			is_pmem;
+	void	   *addr;
+
+	Assert(expected_len > 0);
+	Assert(expected_len <= INT_MAX);
+
+	param_len = (flags & PMEM_FILE_CREATE) ? expected_len : 0;
+	mode = (flags & PMEM_FILE_CREATE) ? pg_file_create_mode : 0;
+
+	mapped_len = 0;
+	is_pmem = 0;
+	addr = pmem_map_file(path, param_len, flags, mode, &mapped_len, &is_pmem);
+
+	if (addr == NULL)
+	{
+		if (try_open && errno == ENOENT)
+			return NULL;
+
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not pmem_map_file \"%s\": %m", path)));
+	}
+
+	if (mapped_len > INT_MAX)
+	{
+		PmemUnmapForError(addr, mapped_len);
+		elog(ERROR,
+			 "unexpected file size: path \"%s\" actual (greater than %d) expected %d",
+			 path, INT_MAX, (int) expected_len);
+	}
+
+	if (mapped_len != expected_len)
+	{
+		PmemUnmapForError(addr, mapped_len);
+		elog(ERROR,
+			 "unexpected file size: path \"%s\" actual %d expected %d",
+			 path, (int) mapped_len, (int) expected_len);
+	}
+
+	if (!is_pmem)
+	{
+		PmemUnmapForError(addr, mapped_len);
+		elog(ERROR, "file not on PMEM: path \"%s\"", path);
+	}
+
+	if ((uintptr_t) addr & ~PG_DAX_HUGEPAGE_MASK)
+		elog(WARNING,
+			 "file not mapped on DAX hugepage boundary: path \"%s\" addr %p",
+			 path, addr);
+
+	return addr;
+}
+
+static void
+PmemUnmapForError(void *addr, size_t len)
+{
+	int		saved_errno;
+
+	saved_errno = errno;
+	(void) pmem_unmap(addr, len);
+	errno = saved_errno;
+}
+
+#endif /* USE_LIBPMEM */
diff --git a/src/include/access/xlogpmem.h b/src/include/access/xlogpmem.h
new file mode 100644
index 00000000000..3978640b82f
--- /dev/null
+++ b/src/include/access/xlogpmem.h
@@ -0,0 +1,59 @@
+/*
+ * xlogpmem.h
+ *
+ * Definitions for PMEM-mapped WAL buffers.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlogpmem.h
+ */
+#ifndef XLOGPMEM_H
+#define XLOGPMEM_H
+
+#include "postgres.h"
+
+#include "c.h"					/* Size */
+#include "access/xlogdefs.h"	/* XLogRecPtr, XLogSegNo */
+
+#ifdef USE_LIBPMEM
+
+/* Prototypes */
+extern XLogSegNo PmemXLogEnsurePrevMapped(XLogRecPtr ptr, TimeLineID tli);
+extern char *PmemXLogGetBufferPages(void);
+extern void PmemXLogFlush(XLogRecPtr start, XLogRecPtr end);
+extern void PmemXLogSync(void);
+
+#else /* USE_LIBPMEM */
+
+#include <stdlib.h> /* abort */
+
+static inline XLogSegNo
+PmemXLogEnsurePrevMapped(XLogRecPtr ptr, tli)
+{
+	abort();
+	return 0;
+}
+
+static inline char *
+PmemXLogGetBufferPages(void)
+{
+	abort();
+	return NULL;
+}
+
+static inline void
+PmemXLogFlush(XLogRecPtr start, XLogRecPtr end)
+{
+	abort();
+}
+
+static inline void
+PmemXLogSync(void)
+{
+	abort();
+}
+
+#endif /* USE_LIBPMEM */
+
+#endif							/* XLOGPMEM_H */
-- 
2.17.1

>From 90ea943904793c5212cd1fed450e4f3b1f97f8a7 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Wed, 5 Jan 2022 21:28:51 -0600
Subject: [PATCH 06/13] compile-fix-without-pmem

---
 src/include/access/xlogpmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/access/xlogpmem.h b/src/include/access/xlogpmem.h
index 3978640b82f..cd83bffc883 100644
--- a/src/include/access/xlogpmem.h
+++ b/src/include/access/xlogpmem.h
@@ -29,7 +29,7 @@ extern void PmemXLogSync(void);
 #include <stdlib.h> /* abort */
 
 static inline XLogSegNo
-PmemXLogEnsurePrevMapped(XLogRecPtr ptr, tli)
+PmemXLogEnsurePrevMapped(XLogRecPtr ptr, TimeLineID tli)
 {
 	abort();
 	return 0;
-- 
2.17.1

>From 9b8091780f0156b35297beaecc5868a2df835e80 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Fri, 5 Nov 2021 14:16:25 +0900
Subject: [PATCH 07/13] Compatible to Windows

---
 src/backend/access/transam/xlogpmem.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/backend/access/transam/xlogpmem.c b/src/backend/access/transam/xlogpmem.c
index 5b50ba80a7a..8bd990f1cd0 100644
--- a/src/backend/access/transam/xlogpmem.c
+++ b/src/backend/access/transam/xlogpmem.c
@@ -8,7 +8,24 @@
 #include <stdint.h>		/* uintptr_t */
 #include <unistd.h>		/* getpid, unlink */
 
+/*
+ * On Windows, we will have two ported but conflicting mode_t:
+ *
+ * mode_t in libpmem:
+ *     libpmem.h -> pmemcompat.h -> typedef int mode_t
+ * mode_t in PostgreSQL:
+ *     c.h -> port.h -> win32_port.h -> typedef unsigned short mode_t
+ *
+ * We want to use PostgreSQL's one, so conseal libpmem's one.
+ */
+#if defined(WIN32) && !defined(__CYGWIN__)
+#define mode_t unused_libpmem_mode_t
+#include <libpmem.h>
+#undef mode_t
+/* On other platforms, simply include libpmem.h */
+#else
 #include <libpmem.h>
+#endif
 
 #include "c.h"						/* bool, Size */
 #include "access/xlog.h"
@@ -242,7 +259,11 @@ PmemMapFile(const char *path, size_t expected_len, int flags, bool try_open)
 
 	mapped_len = 0;
 	is_pmem = 0;
+#if defined(WIN32) && !defined(__CYGWIN__)
+	addr = pmem_map_fileU(path, param_len, flags, mode, &mapped_len, &is_pmem);
+#else
 	addr = pmem_map_file(path, param_len, flags, mode, &mapped_len, &is_pmem);
+#endif
 
 	if (addr == NULL)
 	{
-- 
2.17.1

>From 35c77c59d2e4aec95d6c1489b052d4456855f72f Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Wed, 19 May 2021 11:57:49 +0900
Subject: [PATCH 08/13] WAL statistics in cases of wal_pmem_map=true

---
 src/backend/access/transam/xlogpmem.c | 47 +++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/backend/access/transam/xlogpmem.c b/src/backend/access/transam/xlogpmem.c
index 8bd990f1cd0..d91fb2175b7 100644
--- a/src/backend/access/transam/xlogpmem.c
+++ b/src/backend/access/transam/xlogpmem.c
@@ -195,6 +195,7 @@ void
 PmemXLogFlush(XLogRecPtr start, XLogRecPtr end)
 {
 	Size		off;
+	instr_time	start_time;
 
 	Assert(wal_pmem_map);
 	Assert(start < end);
@@ -203,22 +204,68 @@ PmemXLogFlush(XLogRecPtr start, XLogRecPtr end)
 	Assert(XLByteInPrevSeg(end, mappedSegNo, wal_segment_size));
 
 	off = XLogSegmentOffset(start, wal_segment_size);
+
+	/* Measure I/O timing to write WAL data */
+	if (track_wal_io_timing)
+		INSTR_TIME_SET_CURRENT(start_time);
+
+	pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
 	pmem_flush(mappedPages + off, end - start);
+	pgstat_report_wait_end();
+
+	/*
+	 * Increment the I/O timing and the number of times WAL data
+	 * were written out to disk.
+	 */
+	if (track_wal_io_timing)
+	{
+		instr_time	duration;
+
+		INSTR_TIME_SET_CURRENT(duration);
+		INSTR_TIME_SUBTRACT(duration, start_time);
+		WalStats.m_wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
+	}
+
+	WalStats.m_wal_write++;
 }
 
 /*
  * Wait for cache-flush to finish.
+ *
+ * See also issue_xlog_fsync in xlog.c.
  */
 void
 PmemXLogSync(void)
 {
+	instr_time	start;
+
 	Assert(wal_pmem_map);
 
 	/* Fast return */
 	if (!enableFsync)
 		return;
 
+	/* Measure I/O timing to sync the WAL file */
+	if (track_wal_io_timing)
+		INSTR_TIME_SET_CURRENT(start);
+
+	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
 	pmem_drain();
+	pgstat_report_wait_end();
+
+	/*
+	 * Increment the I/O timing and the number of times WAL files were synced.
+	 */
+	if (track_wal_io_timing)
+	{
+		instr_time	duration;
+
+		INSTR_TIME_SET_CURRENT(duration);
+		INSTR_TIME_SUBTRACT(duration, start);
+		WalStats.m_wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
+	}
+
+	WalStats.m_wal_sync++;
 }
 
 /*
-- 
2.17.1

>From 557ae7c84ea24bc6c7c42104144c422b0154d166 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Wed, 13 Oct 2021 11:10:17 +0900
Subject: [PATCH 09/13] Let wal_pmem_map be constant unless --with-libpmem

---
 src/backend/access/transam/xlog.c | 3 +++
 src/include/access/xlog.h         | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index be56599f9fd..62f08cb50bb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -116,7 +116,10 @@ int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
 int			wal_retrieve_retry_interval = 5000;
 int			max_slot_wal_keep_size_mb = -1;
 bool		track_wal_io_timing = false;
+
+#ifdef USE_LIBPMEM
 bool		wal_pmem_map = false;
+#endif
 
 #ifdef WAL_DEBUG
 bool		XLOG_DEBUG = false;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 73900cbc9e7..ab3eb3887b9 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -88,7 +88,12 @@ extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
 extern bool wal_receiver_create_temp_slot;
 extern bool track_wal_io_timing;
+
+#ifdef USE_LIBPMEM
 extern bool wal_pmem_map;
+#else
+#define wal_pmem_map false
+#endif
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
-- 
2.17.1

>From 133e1281d4b308df6f63edebae94cf1856598af4 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Thu, 28 Oct 2021 13:35:28 +0900
Subject: [PATCH 10/13] Ensure WAL mappings before assertion

---
 src/backend/access/transam/xlog.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 62f08cb50bb..f0d7a317d23 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1945,6 +1945,23 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
 	 */
 	if (ptr / XLOG_BLCKSZ == cachedPage)
 	{
+		/*
+		 * Ensure WAL mappings before assersion.
+		 *
+		 * cachedPos should be recaluculated because it has been probably
+		 * invalidated due to WAL remapping. This should be done even if
+		 * openLogSegNo seems not to change because the address of the
+		 * mapping could have changed (ABA problem).
+		 */
+		if (wal_pmem_map)
+		{
+			endptr = ptr - ptr % XLOG_BLCKSZ + XLOG_BLCKSZ;
+			openLogSegNo = PmemXLogEnsurePrevMapped(endptr, tli);
+			cachedPos = PmemXLogGetBufferPages() +
+						(Size) XLogSegmentOffset(endptr - XLOG_BLCKSZ,
+												 wal_segment_size);
+		}
+
 		Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
 		Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
 		return cachedPos + ptr % XLOG_BLCKSZ;
-- 
2.17.1

>From f3a91d9198d6498c156b8078b77f0e4c401e224f Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Thu, 30 Sep 2021 12:51:56 +0900
Subject: [PATCH 11/13] Update document

---
 doc/src/sgml/config.sgml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 10aa18b7636..5e55564f42a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3239,6 +3239,33 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-wal-pmem-map" xreflabel="wal_pmem_map">
+      <term><varname>wal_pmem_map</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>wal_pmem_map</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If set to <literal>on</literal>, this parameter causes WAL files to be
+        memory-mapped and used as WAL buffer pages.  The WAL files in pg_wal
+        directory (or the directory given by <command>initdb -X</command>
+        option) should be on <firstterm>persistent memory</firstterm> (PMEM) and
+        the filesystem for those files should support the <firstterm>Direct
+        Access</firstterm> (DAX) feature.  <varname>wal_sync_method</varname>
+        for the primary server is ignored and WAL updates are forced out to
+        PMEM in a more optimal way which avoids calling into the kernel.
+        <varname>min_wal_size</varname> should be multiple of the size of a
+        WAL file, and <varname>wal_buffers</varname> is ignored and set to
+        the equivalent of <varname>min_wal_size</varname>.
+       </para>
+       <para>
+        This parameter can only be set at server start. The default is
+        <literal>off</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-wal-writer-delay" xreflabel="wal_writer_delay">
       <term><varname>wal_writer_delay</varname> (<type>integer</type>)
       <indexterm>
-- 
2.17.1

>From b5d695e660b2dad66c96c5944d343ba0e825c7c3 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou...@hco.ntt.co.jp>
Date: Tue, 1 Jun 2021 19:29:22 +0900
Subject: [PATCH 12/13] Preallocate and initialize more WAL if
 wal_pmem_map=true

---
 src/backend/access/transam/xlog.c | 30 ++++++++++++++++++++++++------
 src/backend/utils/misc/guc.c      |  2 +-
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f0d7a317d23..1196ae21e80 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2049,7 +2049,9 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
 	{
 		openLogTLI = tli;
 		openLogSegNo = PmemXLogEnsurePrevMapped(endptr, tli);
-		cachedPos = PmemXLogGetBufferPages() + idx * (Size) XLOG_BLCKSZ;
+		cachedPos = PmemXLogGetBufferPages() +
+					(Size) XLogSegmentOffset(endptr - XLOG_BLCKSZ,
+											 wal_segment_size);
 	}
 	else
 		cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
@@ -2290,7 +2292,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 		{
 			openLogTLI = tli;
 			openLogSegNo = PmemXLogEnsurePrevMapped(NewPageEndPtr, tli);
-			NewPage = (XLogPageHeader) (PmemXLogGetBufferPages() + nextidx * (Size) XLOG_BLCKSZ);
+			NewPage = (XLogPageHeader)
+				(PmemXLogGetBufferPages() +
+				 (Size) XLogSegmentOffset(NewPageBeginPtr, wal_segment_size));
 		}
 		else
 			NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
@@ -5275,15 +5279,25 @@ XLOGShmemSize(void)
 	/*
 	 * If we use WAL segment files as WAL buffers, we don't use the given
 	 * value of wal_buffers. Instead, we set it to the value based on the
-	 * segment size and the page size. This should be done before calculating
+	 * min_wal_size and the page size. This should be done before calculating
 	 * the size of xlblocks array.
+	 *
+	 * TODO Do not allow changing min_wal_size by SIGHUP if wal_pmem_map=true.
+	 *
+	 * TODO Move validations to check_hook functions.
 	 */
 	if (wal_pmem_map)
 	{
 		int			npages;
 		char		buf[32];
 
-		npages = wal_segment_size / XLOG_BLCKSZ;
+		if (min_wal_size_mb % (wal_segment_size / (1024 * 1024)) != 0)
+			elog(PANIC, "min_wal_size should be multiple of wal_segment_size when wal_pmem_map=true");
+
+		if (min_wal_size_mb / (XLOG_BLCKSZ / 1024) > INT_MAX / 1024)
+			elog(PANIC, "too many wal buffer pages");
+
+		npages = min_wal_size_mb / (XLOG_BLCKSZ / 1024) * 1024;
 		snprintf(buf, sizeof(buf), "%d", (int) npages);
 		SetConfigOption("wal_buffers", buf, PGC_POSTMASTER, PGC_S_OVERRIDE);
 	}
@@ -8189,7 +8203,8 @@ StartupXLOG(void)
 			 */
 			openLogTLI = newTLI;
 			openLogSegNo = PmemXLogEnsurePrevMapped(EndOfLog, newTLI);
-			page = PmemXLogGetBufferPages() + firstIdx * (Size) XLOG_BLCKSZ;
+			page = PmemXLogGetBufferPages() +
+				   (Size) XLogSegmentOffset(pageBeginPtr, wal_segment_size);
 			len = EndOfLog % XLOG_BLCKSZ;
 			memset(page + len, 0, XLOG_BLCKSZ - len);
 
@@ -8229,7 +8244,10 @@ StartupXLOG(void)
 	/*
 	 * Preallocate additional log files, if wanted.
 	 */
-	PreallocXlogFiles(EndOfLog, newTLI);
+	if (wal_pmem_map)
+		AdvanceXLInsertBuffer(InvalidXLogRecPtr, newTLI, true);
+	else
+		PreallocXlogFiles(EndOfLog, newTLI);
 
 	/*
 	 * Okay, we're officially UP.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee18a9cf338..6f667d5c43f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2866,7 +2866,7 @@ static struct config_int ConfigureNamesInt[] =
 			GUC_UNIT_XBLOCKS
 		},
 		&XLOGbuffers,
-		-1, -1, (INT_MAX / XLOG_BLCKSZ),
+		-1, -1, INT_MAX,
 		check_wal_buffers, NULL, NULL
 	},
 
-- 
2.17.1

Reply via email to