From b1ef3268b441d7661f5277e4aa89468d957a9f5d Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Wed, 20 Dec 2023 15:52:59 -0500
Subject: [PATCH v17] Add new pg_walsummary tool.

This can dump the contents of the WAL summary files found in
pg_wal/summaries. Normally, this shouldn't really be something anyone
needs to do, but it may be needed for debugging problems with
incremental backup, or could possibly be used in some useful way by
external tools.

XXX. Needs more tests.
---
 doc/src/sgml/ref/allfiles.sgml        |   1 +
 doc/src/sgml/ref/pg_walsummary.sgml   | 122 +++++++++++
 doc/src/sgml/reference.sgml           |   1 +
 src/bin/Makefile                      |   1 +
 src/bin/meson.build                   |   1 +
 src/bin/pg_walsummary/.gitignore      |   1 +
 src/bin/pg_walsummary/Makefile        |  48 +++++
 src/bin/pg_walsummary/meson.build     |  29 +++
 src/bin/pg_walsummary/nls.mk          |   6 +
 src/bin/pg_walsummary/pg_walsummary.c | 280 ++++++++++++++++++++++++++
 src/bin/pg_walsummary/t/001_basic.pl  |  19 ++
 src/tools/pgindent/typedefs.list      |   2 +
 12 files changed, 511 insertions(+)
 create mode 100644 doc/src/sgml/ref/pg_walsummary.sgml
 create mode 100644 src/bin/pg_walsummary/.gitignore
 create mode 100644 src/bin/pg_walsummary/Makefile
 create mode 100644 src/bin/pg_walsummary/meson.build
 create mode 100644 src/bin/pg_walsummary/nls.mk
 create mode 100644 src/bin/pg_walsummary/pg_walsummary.c
 create mode 100644 src/bin/pg_walsummary/t/001_basic.pl

diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index fda4690eab..4a42999b18 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -219,6 +219,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY pgtesttiming       SYSTEM "pgtesttiming.sgml">
 <!ENTITY pgupgrade          SYSTEM "pgupgrade.sgml">
 <!ENTITY pgwaldump          SYSTEM "pg_waldump.sgml">
+<!ENTITY pgwalsummary       SYSTEM "pg_walsummary.sgml">
 <!ENTITY postgres           SYSTEM "postgres-ref.sgml">
 <!ENTITY psqlRef            SYSTEM "psql-ref.sgml">
 <!ENTITY reindexdb          SYSTEM "reindexdb.sgml">
diff --git a/doc/src/sgml/ref/pg_walsummary.sgml b/doc/src/sgml/ref/pg_walsummary.sgml
new file mode 100644
index 0000000000..93e265ead7
--- /dev/null
+++ b/doc/src/sgml/ref/pg_walsummary.sgml
@@ -0,0 +1,122 @@
+<!--
+doc/src/sgml/ref/pg_walsummary.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="app-pgwalsummary">
+ <indexterm zone="app-pgwalsummary">
+  <primary>pg_walsummary</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle><application>pg_walsummary</application></refentrytitle>
+  <manvolnum>1</manvolnum>
+  <refmiscinfo>Application</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>pg_walsummary</refname>
+  <refpurpose>print contents of WAL summary files</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+  <cmdsynopsis>
+   <command>pg_walsummary</command>
+   <arg rep="repeat" choice="opt"><replaceable>option</replaceable></arg>
+   <arg rep="repeat"><replaceable>file</replaceable></arg>
+  </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+  <para>
+   <application>pg_walsummary</application> is used to print the contents of
+   WAL summary files. These binary files are found with the
+   <literal>pg_wal/summaries</literal> subdirectory of the data directory,
+   and can be converted to text using this tool. This is not ordinarily
+   necessary, since WAL summary files primarily exist to support
+   <link linkend="backup-incremental-backup">incremental backup</link>,
+   but it may be useful for debugging purposes.
+  </para>
+
+  <para>
+   A WAL summary file is indexed by tablespace OID, relation OID, and relation
+   fork. For each relation fork, it stores the list of blocks that were
+   modified by WAL within the range summarized in the file. It can also
+   store a "limit block," which is 0 if the relation fork was created or
+   truncated within the relevant WAL range, and otherwise the shortest length
+   to which the relation fork was truncated. If the relation fork was not
+   created, deleted, or truncated within the relevant WAL range, the limit
+   block is undefined or infinite and will not be printed by this tool.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Options</title>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term><option>-i</option></term>
+      <term><option>--indivudual</option></term>
+      <listitem>
+       <para>
+        By default, <literal>pg_walsummary</literal> prints one line of output
+        for each range of one or more consecutive modified blocks. This can
+        make the output a lot briefer, since a relation where all blocks from
+        0 through 999 were modified will produce only one line of output rather
+        than 1000 separate lines. This option requests a separate line of
+        output for every modified block.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-q</option></term>
+      <term><option>--quiet</option></term>
+      <listitem>
+       <para>
+        Do not print any output, except for errors. This can be useful
+        when you want to know whether a WAL summary file can be successfully
+        parsed but don't care about the contents.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+       <term><option>-?</option></term>
+       <term><option>--help</option></term>
+       <listitem>
+       <para>
+       Shows help about <application>pg_walsummary</application> command line
+       arguments, and exits.
+       </para>
+       </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Environment</title>
+
+  <para>
+   The environment variable <envar>PG_COLOR</envar> specifies whether to use
+   color in diagnostic messages. Possible values are
+   <literal>always</literal>, <literal>auto</literal> and
+   <literal>never</literal>.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="app-pgbasebackup"/></member>
+   <member><xref linkend="app-pgcombinebackup"/></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index a07d2b5e01..aa94f6adf6 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -289,6 +289,7 @@
    &pgtesttiming;
    &pgupgrade;
    &pgwaldump;
+   &pgwalsummary;
    &postgres;
 
  </reference>
diff --git a/src/bin/Makefile b/src/bin/Makefile
index aa2210925e..f98f58d39e 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -31,6 +31,7 @@ SUBDIRS = \
 	pg_upgrade \
 	pg_verifybackup \
 	pg_waldump \
+	pg_walsummary \
 	pgbench \
 	psql \
 	scripts
diff --git a/src/bin/meson.build b/src/bin/meson.build
index 4cb6fd59bb..d1e9ef4409 100644
--- a/src/bin/meson.build
+++ b/src/bin/meson.build
@@ -17,6 +17,7 @@ subdir('pg_test_timing')
 subdir('pg_upgrade')
 subdir('pg_verifybackup')
 subdir('pg_waldump')
+subdir('pg_walsummary')
 subdir('pgbench')
 subdir('pgevent')
 subdir('psql')
diff --git a/src/bin/pg_walsummary/.gitignore b/src/bin/pg_walsummary/.gitignore
new file mode 100644
index 0000000000..d71ec192fa
--- /dev/null
+++ b/src/bin/pg_walsummary/.gitignore
@@ -0,0 +1 @@
+pg_walsummary
diff --git a/src/bin/pg_walsummary/Makefile b/src/bin/pg_walsummary/Makefile
new file mode 100644
index 0000000000..2c24bc6db5
--- /dev/null
+++ b/src/bin/pg_walsummary/Makefile
@@ -0,0 +1,48 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/bin/pg_walsummary
+#
+# Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/bin/pg_walsummary/Makefile
+#
+#-------------------------------------------------------------------------
+
+PGFILEDESC = "pg_walsummary - print contents of WAL summary files"
+PGAPPICON=win32
+
+subdir = src/bin/pg_walsummary
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
+LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils
+
+OBJS = \
+	$(WIN32RES) \
+	pg_walsummary.o
+
+all: pg_walsummary
+
+pg_walsummary: $(OBJS) | submake-libpgport submake-libpgfeutils
+	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+
+install: all installdirs
+	$(INSTALL_PROGRAM) pg_walsummary$(X) '$(DESTDIR)$(bindir)/pg_walsummary$(X)'
+
+installdirs:
+	$(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+	rm -f '$(DESTDIR)$(bindir)/pg_walsummary$(X)'
+
+clean distclean maintainer-clean:
+	rm -f pg_walsummary$(X) $(OBJS)
+
+check:
+	$(prove_check)
+
+installcheck:
+	$(prove_installcheck)
diff --git a/src/bin/pg_walsummary/meson.build b/src/bin/pg_walsummary/meson.build
new file mode 100644
index 0000000000..25cd56cda8
--- /dev/null
+++ b/src/bin/pg_walsummary/meson.build
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+pg_walsummary_sources = files(
+  'pg_walsummary.c',
+)
+
+if host_system == 'windows'
+  pg_walsummary_sources += rc_bin_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'pg_walsummary',
+    '--FILEDESC', 'pg_walsummary - print contents of WAL summary files',])
+endif
+
+pg_walsummary = executable('pg_walsummary',
+  pg_walsummary_sources,
+  dependencies: [frontend_code],
+  kwargs: default_bin_args,
+)
+bin_targets += pg_walsummary
+
+tests += {
+  'name': 'pg_walsummary',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'tap': {
+    'tests': [
+      't/001_basic.pl',
+    ],
+  }
+}
diff --git a/src/bin/pg_walsummary/nls.mk b/src/bin/pg_walsummary/nls.mk
new file mode 100644
index 0000000000..f411dcfe9e
--- /dev/null
+++ b/src/bin/pg_walsummary/nls.mk
@@ -0,0 +1,6 @@
+# src/bin/pg_combinebackup/nls.mk
+CATALOG_NAME     = pg_walsummary
+GETTEXT_FILES    = $(FRONTEND_COMMON_GETTEXT_FILES) \
+		   pg_walsummary.c
+GETTEXT_TRIGGERS = $(FRONTEND_COMMON_GETTEXT_TRIGGERS)
+GETTEXT_FLAGS    = $(FRONTEND_COMMON_GETTEXT_FLAGS)
diff --git a/src/bin/pg_walsummary/pg_walsummary.c b/src/bin/pg_walsummary/pg_walsummary.c
new file mode 100644
index 0000000000..0c0225eeb8
--- /dev/null
+++ b/src/bin/pg_walsummary/pg_walsummary.c
@@ -0,0 +1,280 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_walsummary.c
+ *		Prints the contents of WAL summary files.
+ *
+ * Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/bin/pg_walsummary/pg_walsummary.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <fcntl.h>
+#include <limits.h>
+
+#include "common/blkreftable.h"
+#include "common/logging.h"
+#include "fe_utils/option_utils.h"
+#include "lib/stringinfo.h"
+#include "getopt_long.h"
+
+typedef struct ws_options
+{
+	bool		individual;
+	bool		quiet;
+} ws_options;
+
+typedef struct ws_file_info
+{
+	int			fd;
+	char	   *filename;
+} ws_file_info;
+
+static BlockNumber *block_buffer = NULL;
+static unsigned block_buffer_size = 512;	/* Initial size. */
+
+static void dump_one_relation(ws_options *opt, RelFileLocator *rlocator,
+							  ForkNumber forknum, BlockNumber limit_block,
+							  BlockRefTableReader *reader);
+static void help(const char *progname);
+static int	compare_block_numbers(const void *a, const void *b);
+static int	walsummary_read_callback(void *callback_arg, void *data,
+									 int length);
+static void walsummary_error_callback(void *callback_arg, char *fmt,...) pg_attribute_printf(2, 3);
+
+/*
+ * Main program.
+ */
+int
+main(int argc, char *argv[])
+{
+	static struct option long_options[] = {
+		{"individual", no_argument, NULL, 'i'},
+		{"quiet", no_argument, NULL, 'q'},
+		{NULL, 0, NULL, 0}
+	};
+
+	const char *progname;
+	int			optindex;
+	int			c;
+	ws_options	opt;
+
+	memset(&opt, 0, sizeof(ws_options));
+
+	pg_logging_init(argv[0]);
+	progname = get_progname(argv[0]);
+	handle_help_version_opts(argc, argv, progname, help);
+
+	/* process command-line options */
+	while ((c = getopt_long(argc, argv, "f:iqw:",
+							long_options, &optindex)) != -1)
+	{
+		switch (c)
+		{
+			case 'i':
+				opt.individual = true;
+				break;
+			case 'q':
+				opt.quiet = true;
+				break;
+			default:
+				/* getopt_long already emitted a complaint */
+				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
+				exit(1);
+		}
+	}
+
+	if (optind >= argc)
+	{
+		pg_log_error("%s: no input files specified", progname);
+		pg_log_error_hint("Try \"%s --help\" for more information.", progname);
+		exit(1);
+	}
+
+	while (optind < argc)
+	{
+		ws_file_info ws;
+		BlockRefTableReader *reader;
+		RelFileLocator rlocator;
+		ForkNumber	forknum;
+		BlockNumber limit_block;
+
+		ws.filename = argv[optind++];
+		if ((ws.fd = open(ws.filename, O_RDONLY | PG_BINARY, 0)) < 0)
+			pg_fatal("could not open file \"%s\": %m", ws.filename);
+
+		reader = CreateBlockRefTableReader(walsummary_read_callback, &ws,
+										   ws.filename,
+										   walsummary_error_callback, NULL);
+		while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
+											   &limit_block))
+			dump_one_relation(&opt, &rlocator, forknum, limit_block, reader);
+
+		DestroyBlockRefTableReader(reader);
+		close(ws.fd);
+	}
+
+	exit(0);
+}
+
+/*
+ * Dump details for one relation.
+ */
+static void
+dump_one_relation(ws_options *opt, RelFileLocator *rlocator,
+				  ForkNumber forknum, BlockNumber limit_block,
+				  BlockRefTableReader *reader)
+{
+	unsigned	i = 0;
+	unsigned	nblocks;
+	BlockNumber startblock = InvalidBlockNumber;
+	BlockNumber endblock = InvalidBlockNumber;
+
+	/* Dump limit block, if any. */
+	if (limit_block != InvalidBlockNumber)
+		printf("TS %u, DB %u, REL %u, FORK %s: limit %u\n",
+			   rlocator->spcOid, rlocator->dbOid, rlocator->relNumber,
+			   forkNames[forknum], limit_block);
+
+	/* If we haven't allocated a block buffer yet, do that now. */
+	if (block_buffer == NULL)
+		block_buffer = palloc_array(BlockNumber, block_buffer_size);
+
+	/* Try to fill the block buffer. */
+	nblocks = BlockRefTableReaderGetBlocks(reader,
+										   block_buffer,
+										   block_buffer_size);
+
+	/* If we filled the block buffer completely, we must enlarge it. */
+	while (nblocks >= block_buffer_size)
+	{
+		unsigned	new_size;
+
+		/* Double the size, being careful about overflow. */
+		new_size = block_buffer_size * 2;
+		if (new_size < block_buffer_size)
+			new_size = PG_UINT32_MAX;
+		block_buffer = repalloc_array(block_buffer, BlockNumber, new_size);
+
+		/* Try to fill the newly-allocated space. */
+		nblocks +=
+			BlockRefTableReaderGetBlocks(reader,
+										 block_buffer + block_buffer_size,
+										 new_size - block_buffer_size);
+
+		/* Save the new size for later calls. */
+		block_buffer_size = new_size;
+	}
+
+	/* If we don't need to produce any output, skip the rest of this. */
+	if (opt->quiet)
+		return;
+
+	/*
+	 * Sort the returned block numbers. If the block reference table was using
+	 * the bitmap representation for a given chunk, the block numbers in that
+	 * chunk will already be sorted, but when the array-of-offsets
+	 * representation is used, we can receive block numbers here out of order.
+	 */
+	qsort(block_buffer, nblocks, sizeof(BlockNumber), compare_block_numbers);
+
+	/* Dump block references. */
+	while (i < nblocks)
+	{
+		/*
+		 * Find the next range of blocks to print, but if --individual was
+		 * specified, then consider each block a separate range.
+		 */
+		startblock = endblock = block_buffer[i++];
+		if (!opt->individual)
+		{
+			while (i < nblocks && block_buffer[i] == endblock + 1)
+			{
+				endblock++;
+				i++;
+			}
+		}
+
+		/* Format this range of block numbers as a string. */
+		if (startblock == endblock)
+			printf("TS %u, DB %u, REL %u, FORK %s: block %u\n",
+				   rlocator->spcOid, rlocator->dbOid, rlocator->relNumber,
+				   forkNames[forknum], startblock);
+		else
+			printf("TS %u, DB %u, REL %u, FORK %s: blocks %u..%u\n",
+				   rlocator->spcOid, rlocator->dbOid, rlocator->relNumber,
+				   forkNames[forknum], startblock, endblock);
+	}
+}
+
+/*
+ * Quicksort comparator for block numbers.
+ */
+static int
+compare_block_numbers(const void *a, const void *b)
+{
+	BlockNumber aa = *(BlockNumber *) a;
+	BlockNumber bb = *(BlockNumber *) b;
+
+	if (aa > bb)
+		return 1;
+	else if (aa == bb)
+		return 0;
+	else
+		return -1;
+}
+
+/*
+ * Error callback.
+ */
+void
+walsummary_error_callback(void *callback_arg, char *fmt,...)
+{
+	va_list		ap;
+
+	va_start(ap, fmt);
+	pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, fmt, ap);
+	va_end(ap);
+
+	exit(1);
+}
+
+/*
+ * Read callback.
+ */
+int
+walsummary_read_callback(void *callback_arg, void *data, int length)
+{
+	ws_file_info *ws = callback_arg;
+	int			rc;
+
+	if ((rc = read(ws->fd, data, length)) < 0)
+		pg_fatal("could not read file \"%s\": %m", ws->filename);
+
+	return rc;
+}
+
+/*
+ * help
+ *
+ * Prints help page for the program
+ *
+ * progname: the name of the executed program, such as "pg_walsummary"
+ */
+static void
+help(const char *progname)
+{
+	printf(_("%s prints the contents of a WAL summary file.\n\n"), progname);
+	printf(_("Usage:\n"));
+	printf(_("  %s [OPTION]... FILE...\n"), progname);
+	printf(_("\nOptions:\n"));
+	printf(_("  -i, --individual          list block numbers individually, not as ranges\n"));
+	printf(_("  -q, --quiet               don't print anything, just parse the files\n"));
+	printf(_("  -?, --help                show this help, then exit\n"));
+
+	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
+}
diff --git a/src/bin/pg_walsummary/t/001_basic.pl b/src/bin/pg_walsummary/t/001_basic.pl
new file mode 100644
index 0000000000..10a232a150
--- /dev/null
+++ b/src/bin/pg_walsummary/t/001_basic.pl
@@ -0,0 +1,19 @@
+# Copyright (c) 2021-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
+program_help_ok('pg_walsummary');
+program_version_ok('pg_walsummary');
+program_options_handling_ok('pg_walsummary');
+
+command_fails_like(
+	['pg_walsummary'],
+	qr/no input files specified/,
+	'input files must be specified');
+
+done_testing();
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e37ef9aa76..86e0a86503 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4035,3 +4035,5 @@ cb_tablespace_mapping
 manifest_data
 manifest_writer
 rfile
+ws_options
+ws_file_info
-- 
2.39.3 (Apple Git-145)

