On Tue, Jul 30, 2019 at 9:39 AM Jeevan Chalke <
jeevan.cha...@enterprisedb.com> wrote:

>
>
>
> I am almost done writing the patch for pg_combinebackup and will post soon.
>

Attached patch which implements the pg_combinebackup utility used to combine
full basebackup with one or more incremental backups.

I have tested it manually and it works for all best cases.

Let me know if you have any inputs/suggestions/review comments?

Thanks
-- 
Jeevan Chalke
Technical Architect, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index 8d91f35..f3e90b6 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -200,6 +200,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY pgBasebackup       SYSTEM "pg_basebackup.sgml">
 <!ENTITY pgbench            SYSTEM "pgbench.sgml">
 <!ENTITY pgChecksums        SYSTEM "pg_checksums.sgml">
+<!ENTITY pgCombinebackup    SYSTEM "pg_combinebackup.sgml">
 <!ENTITY pgConfig           SYSTEM "pg_config-ref.sgml">
 <!ENTITY pgControldata      SYSTEM "pg_controldata.sgml">
 <!ENTITY pgCtl              SYSTEM "pg_ctl-ref.sgml">
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 00782e0..92d9d13 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -415,7 +415,7 @@ PostgreSQL documentation
         which are modified after this given LSN will be backed up. The file
         which has these partial blocks has .partial as an extension. Backup
         taken in this manner has to be combined with the full backup with the
-        <command>pg_combinebackup</command> utility.
+        <xref linkend="app-pgcombinebackup"/> utility.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/ref/pg_combinebackup.sgml b/doc/src/sgml/ref/pg_combinebackup.sgml
new file mode 100644
index 0000000..ed87931
--- /dev/null
+++ b/doc/src/sgml/ref/pg_combinebackup.sgml
@@ -0,0 +1,202 @@
+<!--
+doc/src/sgml/ref/pg_combinebackup.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="app-pgcombinebackup">
+ <indexterm zone="app-pgcombinebackup">
+  <primary>pg_combinebackup</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle><application>pg_combinebackup</application></refentrytitle>
+  <manvolnum>1</manvolnum>
+  <refmiscinfo>Application</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>pg_combinebackup</refname>
+  <refpurpose>create a synthetic backup from a full backup and one or more incremental backups</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+  <cmdsynopsis>
+   <command>pg_combinebackup</command>
+   <arg rep="repeat" choice="opt"><replaceable class="parameter">option</replaceable></arg>
+  </cmdsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1 id="r1-app-pg_combinebackup-1">
+  <title>Description</title>
+  <para>
+   <application>pg_combinebackup</application> combines one or more incremental
+   backups with the full base-backup to generate a synthetic backup.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Options</title>
+
+   <para>
+    The following command-line options are available:
+
+    <variablelist>
+     <varlistentry>
+      <term><option>-f <replaceable>directory</replaceable></option></term>
+      <term><option>--full-backup=<replaceable>directory</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the directory where the full backup is stored.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-i <replaceable>directory</replaceable></option></term>
+      <term><option>--incr-backup=<replaceable>directory</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the directory where the incremental backup is stored.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-o <replaceable>directory</replaceable></option></term>
+      <term><option>--output-backup=<replaceable>directory</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the output directory where the combined full synthetic backup
+        to be stored.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-n</option></term>
+      <term><option>--no-clean</option></term>
+      <listitem>
+       <para>
+        By default, when <command>pg_combinebackup</command> aborts with an
+        error, it removes the output data directories it might have created
+        before discovering that it cannot finish the job. This option inhibits
+        tidying-up and is thus useful for debugging.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-r <replaceable>max-retries</replaceable></option></term>
+      <listitem>
+       <para>
+        Max number of retries on copy command, with progressive wait.
+        Default is 3.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-s <replaceable>interval</replaceable></option></term>
+      <listitem>
+       <para>
+        Sleep interval before retry (in seconds).
+        Should be in between 1 and 60, default is 5.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-v</option></term>
+      <term><option>--verbose</option></term>
+      <listitem>
+       <para>
+        Enable verbose output. Lists all partial files processed and its
+        checksum status.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+       <term><option>-V</option></term>
+       <term><option>--version</option></term>
+       <listitem>
+       <para>
+        Print the <application>pg_combinebackup</application> version and exit.
+       </para>
+       </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-?</option></term>
+      <term><option>--help</option></term>
+       <listitem>
+        <para>
+         Show help about <application>pg_combinebackup</application> command line
+         arguments, and exit.
+        </para>
+       </listitem>
+      </varlistentry>
+    </variablelist>
+   </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Environment</title>
+  <variablelist>
+   <varlistentry>
+    <term><envar>PG_COLOR</envar></term>
+    <listitem>
+     <para>
+      Specifies whether to use color in diagnostics messages.  Possible values
+      are <literal>always</literal>, <literal>auto</literal>,
+      <literal>never</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+  <para>
+   Output directory, full backup directory, and at-least one incremental backup
+   directory must be specified.
+  </para>
+
+  <para>
+   <literal>PREVIOUS WAL LOCATION</literal> of the incremental backup must
+   match with the <literal>START WAL LOCATION</literal> of the previous full
+   or incremental backup in a given sequence.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To combine a full backup with two incremental backups and store it in the
+   output directory:
+<screen>
+<prompt>$</prompt> <userinput>pg_combinebackup -f /data/full/data -i /data/incr/data1 -i /data/incr/data2 -o /data/full/fulldata</userinput>
+</screen>
+  </para>
+
+  <para>
+   To combine a full backup with an incremental backups and store it in the
+   output directory along with verious options like, verbose, no-clean, and
+   maximum 3 retries: 
+<screen>
+<prompt>$</prompt> <userinput>pg_combinebackup -v --no-clean -r 3 -f /data/full/data -i /data/incr/data1 -o /data/full/fulldata</userinput>
+</screen>
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="app-pgbasebackup"/></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index cef09dd..3513ab4 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -248,6 +248,7 @@
    &ecpgRef;
    &pgBasebackup;
    &pgbench;
+   &pgCombinebackup;
    &pgConfig;
    &pgDump;
    &pgDumpall;
diff --git a/src/bin/Makefile b/src/bin/Makefile
index 903e581..fc3cea4 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -18,6 +18,7 @@ SUBDIRS = \
 	pg_archivecleanup \
 	pg_basebackup \
 	pg_checksums \
+	pg_combinebackup \
 	pg_config \
 	pg_controldata \
 	pg_ctl \
diff --git a/src/bin/pg_combinebackup/Makefile b/src/bin/pg_combinebackup/Makefile
new file mode 100644
index 0000000..bdc9219
--- /dev/null
+++ b/src/bin/pg_combinebackup/Makefile
@@ -0,0 +1,42 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/bin/pg_combinebackup
+#
+# Copyright (c) 1998-2019, PostgreSQL Global Development Group
+#
+# src/bin/pg_combinebackup/Makefile
+#
+#-------------------------------------------------------------------------
+
+PGFILEDESC = "pg_combinebackup - combine full backup with incremental backups"
+PGAPPICON=win32
+
+subdir = src/bin/pg_combinebackup
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS= pg_combinebackup.o $(WIN32RES)
+
+all: pg_combinebackup
+
+pg_combinebackup: $(OBJS) | submake-libpgport
+	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+install: all installdirs
+	$(INSTALL_PROGRAM) pg_combinebackup$(X) '$(DESTDIR)$(bindir)/pg_combinebackup$(X)'
+
+installdirs:
+	$(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+	rm -f '$(DESTDIR)$(bindir)/pg_combinebackup$(X)'
+
+clean distclean maintainer-clean:
+	rm -f pg_combinebackup$(X) $(OBJS)
+	rm -rf tmp_check
+
+check:
+	$(prove_check)
+
+installcheck:
+	$(prove_installcheck)
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
new file mode 100644
index 0000000..0b1ab88
--- /dev/null
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -0,0 +1,938 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_combinebackup.c
+ *	  Combines one or more incremental backups with the full base-backup to
+ *	  generate new full base-backup.
+ *
+ * Copyright (c) 2010-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/bin/pg_combinebackup/pg_combinebackup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include <dirent.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "access/xlog_internal.h"
+#include "common/controldata_utils.h"
+#include "common/file_perm.h"
+#include "common/logging.h"
+#include "getopt_long.h"
+#include "pg_getopt.h"
+
+
+/*
+ * Filename components.
+ *
+ * XXX: fd.h is not declared here as frontend side code is not able to
+ * interact with the backend-side definitions for the various fsync
+ * wrappers.
+ */
+#define PG_TEMP_FILES_DIR "pgsql_tmp"
+#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
+
+/* Max number of incremental backups to be combined. */
+#define MAX_INCR_BK_COUNT	10
+
+/* magic number in incremental backup's .partial file */
+#define INCREMENTAL_BACKUP_MAGIC	0x494E4352
+
+typedef struct
+{
+	FILE	   *fp;
+	char		filename[MAXPGPATH];
+	bool		isPartial;
+	int			dirIndex;
+} FileMap;
+
+typedef struct
+{
+	FILE	   *fp;
+	int			offset;
+} FileOffset;
+
+/* Defined in basebackup.c */
+typedef struct
+{
+	uint32		magic;
+	pg_crc32c	checksum;
+	uint32		nblocks;
+	uint32		blocknumbers[FLEXIBLE_ARRAY_MEMBER];
+} partial_file_header;
+
+static const char *progname;
+static ControlFileData *ControlFile;
+static bool verbose = false;
+static bool success = false;
+static bool noclean = false;
+static bool made_new_outputdata = false;
+static bool found_existing_outputdata = false;
+static bool checksum_failure = false;
+static char	*OutputDir = NULL;
+static int	maxretries = 3;		/* number of retries on copy command */
+static int  sleeptime = 5;		/* amount of time to sleep before retry */
+
+/* Function headers */
+static void usage(void);
+static void check_compatibility(char *datadir);
+static void verify_dir_is_empty_or_create(char *dirname, bool *created,
+										  bool *found);
+static void cleanup_directories_atexit(void);
+static void combine_partial_files(const char *fn, char **IncrDirs,
+								  int nIncrDir, const char *subdirpath,
+								  const char*outfn);
+static void copy_whole_file(const char *fromfn, const char *tofn);
+static void cleanup_filemaps(FileMap *filemaps, int nfilemaps);
+
+
+static void
+usage(void)
+{
+	printf(_("%s combines full backup with incremental backup.\n\n"), progname);
+	printf(_("Usage:\n"));
+	printf(_("  %s [OPTION]...\n"), progname);
+	printf(_("\nOptions:\n"));
+	printf(_("  -f, --full-backup=DIRECTORY full backup directory\n"));
+	printf(_("  -i, --incr-backup=DIRECTORY incremental backup directory (maximum %d)\n"), MAX_INCR_BK_COUNT);
+	printf(_("  -o, --output-dir=DIRECTORY  combine backup into directory\n"));
+	printf(_("\nGeneral options:\n"));
+	printf(_("  -n, --no-clean              do not clean up after errors\n"));
+	printf(_("  -r MAXRETRIES               max number of retries on copy command, with progressive wait\n"
+			 "                              (default=3)\n"));
+	printf(_("  -s SLEEPTIME                sleep interval before retry (in seconds)\n"
+			 "                              (min=1,max=60,default=5)\n"));
+	printf(_("  -v, --verbose               output verbose messages\n"));
+	printf(_("  -V, --version               output version information, then exit\n"));
+	printf(_("  -?, --help                  show this help, then exit\n"));
+	printf(_("\nReport bugs to <pgsql-b...@lists.postgresql.org>.\n"));
+}
+
+/*
+ * scan_file
+ *
+ * Checks whether given file is partial file or not.  If partial, then combines
+ * it into a full backup file, else copies as is to the output directory.
+ */
+static void
+scan_file(const char *fn, char **IncrDirs, int nIncrDir,
+		  const char *subdirpath)
+{
+	char		infn[MAXPGPATH];
+	char		outfn[MAXPGPATH];
+	char	   *extptr;
+	bool		isPartialFile = false;
+
+	extptr = strstr(fn, ".partial");
+	if (extptr != NULL)
+		isPartialFile = true;
+
+	if (subdirpath)
+	{
+		snprintf(infn, MAXPGPATH, "%s/%s/%s", IncrDirs[nIncrDir - 1],
+				 subdirpath, fn);
+		if (isPartialFile)
+			extptr[0] = '\0';
+		snprintf(outfn, MAXPGPATH, "%s/%s/%s", OutputDir, subdirpath, fn);
+	}
+	else
+	{
+		snprintf(infn, MAXPGPATH, "%s/%s", IncrDirs[nIncrDir - 1], fn);
+		if (isPartialFile)
+			extptr[0] = '\0';
+		snprintf(outfn, MAXPGPATH, "%s/%s", OutputDir, fn);
+	}
+
+	/* If .partial file, combine them, else copy it as is */
+	if (isPartialFile)
+	{
+		if (verbose)
+			pg_log_info("combining partial file \"%s.partial\"", fn);
+
+		combine_partial_files(fn, IncrDirs, nIncrDir, subdirpath, outfn);
+	}
+	else
+		copy_whole_file(infn, outfn);
+}
+
+/*
+ * copy_whole_file
+ *
+ * Copy file from source to its destination.
+ */
+static void
+copy_whole_file(const char *fromfn, const char *tofn)
+{
+	char		copycmd[MAXPGPATH * 2 + 10];
+	int			rc = 0;
+	int			numretries = 0;
+
+	/*
+	 * Create a copy command.  For windows we use "copy", and for all other
+	 * platforms we use "cp" command.
+	 */
+#ifdef WIN32
+	snprintf(copycmd, MAXPGPATH, "copy \"%s\" \"%s\"", fromfn, tofn);
+#else
+	snprintf(copycmd, MAXPGPATH, "cp \"%s\" \"%s\"", fromfn, tofn);
+#endif
+
+	/*
+	 * Try copying until we reach maxretries limit.  Before retrying, we sleep
+	 * for seconds specified by sleeptime.
+	 */
+	while (numretries <= maxretries)
+	{
+		rc = system(copycmd);
+		if (rc == 0)
+			return;
+
+		pg_log_info("could not copy, retrying after %d seconds",
+					sleeptime);
+		pg_usleep(numretries++ * sleeptime * 1000000L);
+	}
+
+	pg_log_error("could not copy file \"%s\" to \"%s\": %m", fromfn, tofn);
+	exit(1);
+}
+
+/*
+ * scan_directory
+ *
+ * Scan the input incremental directory and operates on each file.  Creates
+ * corresponding directories in the output directory too.
+ */
+static void
+scan_directory(char **IncrDirs, int nIncrDir, const char *subdirpath)
+{
+	char		path[MAXPGPATH];
+	DIR		   *dir;
+	struct dirent *de;
+
+	if (subdirpath)
+	{
+		char		outputpath[MAXPGPATH];
+
+		snprintf(path, sizeof(path), "%s/%s", IncrDirs[nIncrDir - 1],
+				 subdirpath);
+		snprintf(outputpath, sizeof(outputpath), "%s/%s", OutputDir,
+				 subdirpath);
+
+		/* Create this sub-directory in output directory */
+		if (pg_mkdir_p(outputpath, pg_dir_create_mode) == -1)
+		{
+			pg_log_error("could not create directory \"%s\": %m", outputpath);
+			exit(1);
+		}
+	}
+	else
+		snprintf(path, sizeof(path), "%s", IncrDirs[nIncrDir - 1]);
+
+	dir = opendir(path);
+	if (!dir)
+	{
+		pg_log_error("could not open directory \"%s\": %m", path);
+		exit(1);
+	}
+
+	while ((de = readdir(dir)) != NULL)
+	{
+		char		fn[MAXPGPATH];
+		struct stat st;
+
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
+			continue;
+
+		/* Skip temporary files */
+		if (strncmp(de->d_name,
+					PG_TEMP_FILE_PREFIX,
+					strlen(PG_TEMP_FILE_PREFIX)) == 0)
+			continue;
+
+		/* Skip temporary folders */
+		if (strncmp(de->d_name,
+					PG_TEMP_FILES_DIR,
+					strlen(PG_TEMP_FILES_DIR)) == 0)
+			continue;
+
+		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
+		if (lstat(fn, &st) < 0)
+		{
+			pg_log_error("could not stat file \"%s\": %m", fn);
+			exit(1);
+		}
+		if (S_ISREG(st.st_mode))
+			scan_file(de->d_name, IncrDirs, nIncrDir, subdirpath);
+#ifndef WIN32
+		else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
+#else
+		else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
+#endif
+		{
+			char		newsubdirpath[MAXPGPATH];
+
+			if (subdirpath)
+				snprintf(newsubdirpath, MAXPGPATH, "%s/%s", subdirpath,
+						 de->d_name);
+			else
+				snprintf(newsubdirpath, MAXPGPATH, "%s", de->d_name);
+
+			scan_directory(IncrDirs, nIncrDir, newsubdirpath);
+		}
+	}
+	closedir(dir);
+	return;
+}
+
+int
+main(int argc, char *argv[])
+{
+	static struct option long_options[] = {
+		{"full-backup", required_argument, NULL, 'f'},
+		{"incr-backup", required_argument, NULL, 'i'},
+		{"output-dir", required_argument, NULL, 'o'},
+		{"no-clean", no_argument, NULL, 'n'},
+		{"verbose", no_argument, NULL, 'v'},
+		{NULL, 0, NULL, 0}
+	};
+
+	char	   *IncrDirs[MAX_INCR_BK_COUNT];
+	int			nIncrDir;
+	int			c;
+	int			option_index;
+	int			i;
+	XLogRecPtr	startlsn = InvalidXLogRecPtr;
+	XLogRecPtr	prevlsn = InvalidXLogRecPtr;
+
+	pg_logging_init(argv[0]);
+	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_combinebackup"));
+	progname = get_progname(argv[0]);
+
+	if (argc > 1)
+	{
+		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+		{
+			usage();
+			exit(0);
+		}
+		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+		{
+			puts("pg_combinebackup (PostgreSQL) " PG_VERSION);
+			exit(0);
+		}
+	}
+
+	atexit(cleanup_directories_atexit);
+
+	/* Zero index is reserved for full backup directory. */
+	IncrDirs[0] = NULL;
+	nIncrDir = 1;
+	while ((c = getopt_long(argc, argv, "f:ni:o:r:s:v", long_options, &option_index)) != -1)
+	{
+		switch (c)
+		{
+			case 'f':
+				IncrDirs[0] = optarg;
+				break;
+			case 'n':
+				noclean = true;
+				break;
+			case 'i':
+				if (nIncrDir == MAX_INCR_BK_COUNT)
+				{
+					pg_log_error("too many incremental backups to combine");
+					fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+					exit(1);
+				}
+
+				IncrDirs[nIncrDir] = optarg;
+				nIncrDir++;
+				break;
+			case 'o':
+				OutputDir = optarg;
+				break;
+			case 'r':
+				maxretries = atoi(optarg);
+				if (maxretries < 0)
+				{
+					pg_log_error("invalid value for maxretries");
+					fprintf(stderr, _("%s: -r maxretries must be >= 0\n"), progname);
+					exit(1);
+				}
+				break;
+			case 's':
+				sleeptime = atoi(optarg);
+				if (sleeptime <= 0 || sleeptime > 60)
+				{
+					pg_log_error("invalid value for sleeptime");
+					fprintf(stderr, _("%s: -s sleeptime must be between 1 and 60\n"), progname);
+					exit(1);
+				}
+				break;
+			case 'v':
+				verbose = true;
+				break;
+			default:
+				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+				exit(1);
+		}
+	}
+
+	/*
+	 * Need to have directory paths for full backup, incremental backups, and
+	 * the output directory.  Error out if we don't get that.
+	 */
+	if (IncrDirs[0] == NULL)
+	{
+		pg_log_error("no full backup directory specified");
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+				progname);
+		exit(1);
+	}
+	if (nIncrDir == 1)
+	{
+		pg_log_error("no incremental backup directory specified");
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+				progname);
+		exit(1);
+	}
+	if (OutputDir == NULL)
+	{
+		pg_log_error("no target directory specified");
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+				progname);
+		exit(1);
+	}
+	else
+		verify_dir_is_empty_or_create(OutputDir, &made_new_outputdata,
+									  &found_existing_outputdata);
+
+	/* Complain if any arguments remain */
+	if (optind < argc)
+	{
+		pg_log_error("too many command-line arguments (first is \"%s\")",
+					 argv[optind]);
+		fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+				progname);
+		exit(1);
+	}
+
+	/*
+	 * Verify the backup chain.  PREVIOUS WAL LOCATION of the incremental
+	 * backup must match with the START WAL LOCATION of the previous backup,
+	 * until we reach a full backup in which there is no PREVIOUS WAL LOCATION.
+	 */
+	for (i = (nIncrDir - 1); i >= 0; i--)
+	{
+		struct stat statbuf;
+		char		filename[MAXPGPATH];
+		FILE	   *fp;
+		char	   *labelfile;
+		char		startxlogfilename[MAXFNAMELEN];
+		uint32		hi;
+		uint32		lo;
+		char		ch;
+
+#define BACKUP_LABEL_FILE		"backup_label"
+
+		check_compatibility(IncrDirs[i]);
+
+		snprintf(filename, MAXPGPATH, "%s/%s", IncrDirs[i], BACKUP_LABEL_FILE);
+		if (stat(filename, &statbuf))
+		{
+			pg_log_error("could not stat file \"%s\": %m", filename);
+			exit(1);
+		}
+		fp = fopen(filename, "r");
+		if (fp == NULL)
+		{
+			pg_log_error("could not read file \"%s\": %m", filename);
+			exit(1);
+		}
+
+		labelfile = malloc(statbuf.st_size + 1);
+		if (fread(labelfile, 1, statbuf.st_size, fp) != statbuf.st_size)
+		{
+			pg_log_error("corrupted file \"%s\": %m", filename);
+			free(labelfile);
+			exit(1);
+		}
+
+		labelfile[statbuf.st_size] = '\0';
+
+		/*
+		 * Read the START WAL LOCATION from the directory, we skip this for top
+		 * most directory corresponding to the last incremental backup as it is
+		 * not needed to check.
+		 */
+		if (i != (nIncrDir - 1))
+		{
+			if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c",
+					   &hi, &lo, startxlogfilename,
+					   &ch) != 4 || ch != '\n')
+			{
+				pg_log_error("invalid data in file \"%s\": %m", filename);
+				free(labelfile);
+				exit(1);
+			}
+			startlsn = ((uint64) hi) << 32 | lo;
+
+			/*
+			 * We end up here from second loop counter, thus prevlsn must have
+			 * been already set.  Check that with startlsn fetched above, they
+			 * must match.  Otherwise we have a broken chain, bail out.
+			 */
+			Assert(prevlsn != InvalidXLogRecPtr);
+			if (prevlsn != startlsn)
+			{
+				pg_log_error("invalid backup chain");
+				free(labelfile);
+				exit(1);
+			}
+		}
+
+		/*
+		 * Fetch the PREVIOUS WAL LOCATION from the incremental backup
+		 * directory.  Index 0 is of full backup directory where we won't have
+		 * that, so we skip it.
+		 */
+		if (i != 0)
+		{
+			char	   *ptr = strstr(labelfile, "PREVIOUS WAL LOCATION:");
+
+			if (!ptr || sscanf(ptr, "PREVIOUS WAL LOCATION: %X/%X", &hi, &lo) != 2)
+			{
+				pg_log_error("invalid data in file \"%s\": %m", filename);
+				free(labelfile);
+				exit(1);
+			}
+			prevlsn = ((uint64) hi) << 32 | lo;
+		}
+
+		free(labelfile);
+		fclose(fp);
+	}
+
+	/* Scan whole directory and process all .partial files */
+	scan_directory(IncrDirs, nIncrDir, NULL);
+
+	success = true;
+	return 0;
+}
+
+/*
+ * check_compatibility
+ *
+ * Read the control file and check compatibility
+ */
+static void
+check_compatibility(char *datadir)
+{
+	bool		crc_ok;
+
+	ControlFile = get_controlfile(datadir, &crc_ok);
+	if (!crc_ok)
+	{
+		pg_log_error("pg_control CRC value is incorrect");
+		exit(1);
+	}
+
+	if (ControlFile->pg_control_version != PG_CONTROL_VERSION)
+	{
+		pg_log_error("cluster is not compatible with this version of pg_combinebackup");
+		exit(1);
+	}
+
+	if (ControlFile->blcksz != BLCKSZ)
+	{
+		pg_log_error("database cluster is not compatible");
+		fprintf(stderr, _("The database cluster was initialized with block size %u, but pg_combinebackup was compiled with block size %u.\n"),
+				ControlFile->blcksz, BLCKSZ);
+		exit(1);
+	}
+
+	/* When backup was taken, the database should have been in clean state. */
+	if (ControlFile->state != DB_IN_PRODUCTION)
+	{
+		pg_log_error("cluster must be in production");
+		exit(1);
+	}
+}
+
+/*
+ * verify_dir_is_empty_or_create
+ *
+ * Verify that the given directory exists and is empty.  If it does not exists,
+ * it is created.  If it exists but is not empty, an error will be given and
+ * the process ended.
+ */
+static void
+verify_dir_is_empty_or_create(char *dirname, bool *created, bool *found)
+{
+	switch (pg_check_dir(dirname))
+	{
+		case 0:
+
+			/*
+			 * Does not exist, so create
+			 */
+			if (pg_mkdir_p(dirname, pg_dir_create_mode) == -1)
+			{
+				pg_log_error("could not create directory \"%s\": %m", dirname);
+				exit(1);
+			}
+			if (created)
+				*created = true;
+			return;
+		case 1:
+
+			/*
+			 * Exists, empty
+			 */
+			if (found)
+				*found = true;
+			return;
+		case 2:
+		case 3:
+		case 4:
+
+			/*
+			 * Exists, not empty
+			 */
+			pg_log_error("directory \"%s\" exists but is not empty", dirname);
+			exit(1);
+		case -1:
+
+			/*
+			 * Access problem
+			 */
+			pg_log_error("could not access directory \"%s\": %m", dirname);
+			exit(1);
+	}
+}
+
+static void
+cleanup_directories_atexit(void)
+{
+	if (success)
+		return;
+
+	if (!noclean && !checksum_failure)
+	{
+		if (made_new_outputdata)
+		{
+			pg_log_info("removing target data directory \"%s\"", OutputDir);
+			if (!rmtree(OutputDir, true))
+				pg_log_error("failed to remove data directory");
+		}
+		else if (found_existing_outputdata)
+		{
+			pg_log_info("removing contents of target data directory \"%s\"",
+						OutputDir);
+			if (!rmtree(OutputDir, false))
+				pg_log_error("failed to remove contents of data directory");
+		}
+	}
+	else
+	{
+		if ((made_new_outputdata || found_existing_outputdata) &&
+			!checksum_failure)
+			pg_log_info("target data directory \"%s\" not removed at user's request",
+						OutputDir);
+	}
+}
+
+/*
+ * combine_partial_files
+ *
+ * Combines one or more incremental backups with full backup.  The algorithm in
+ * this function works this way:
+ * 	1.	Work backward through the backup chain until we find a complete version
+ * 		of the file. We create a filemap in this process.
+ * 	2.	Loop over all the files within filemap, read the header and check the
+ * 		blocks modified, verify the CRC and create a blockmap.
+ * 	3.	Create a new file in output directory by writing all the blocks.
+ */
+static void
+combine_partial_files(const char *fn, char **IncrDirs, int nIncrDir,
+					  const char *subdirpath, const char *outfn)
+{
+	FILE	   *outfp;
+	FileOffset	outblocks[RELSEG_SIZE];
+	int			i;
+	FileMap	   *filemaps;
+	int			fmindex;
+	bool		basefilefound;
+	bool		modifiedblockfound;
+	uint32		lastblkno;
+	FileMap    *fm;
+	struct stat statbuf;
+	uint32		nblocks;
+
+	memset(outblocks, 0, sizeof(FileOffset) * RELSEG_SIZE);
+	filemaps = (FileMap *) malloc(sizeof(FileMap) * nIncrDir);
+
+	/*
+	 * Open all files from all incremental backup directories and create a file
+	 * map.
+	 */
+	basefilefound = false;
+	for (i = (nIncrDir - 1), fmindex = 0; i >= 0; i--, fmindex++)
+	{
+		fm = &filemaps[fmindex];
+
+		if (subdirpath)
+			snprintf(fm->filename, MAXPGPATH, "%s/%s/%s.partial", IncrDirs[i], subdirpath,
+					 fn);
+		else
+			snprintf(fm->filename, MAXPGPATH, "%s/%s.partial", IncrDirs[i], fn);
+
+		fm->fp = fopen(fm->filename, "rb");
+		if (fm->fp == NULL)
+		{
+			if (errno == ENOENT)
+			{
+				char *extptr = strstr(fm->filename, ".partial");
+
+				Assert (extptr != NULL);
+				extptr[0] = '\0';
+
+				/* Check without .partial */
+				fm->fp = fopen(fm->filename, "rb");
+				if (fm->fp != NULL)
+				{
+					fm->isPartial = false;
+					fm->dirIndex = i;
+					basefilefound = true;
+					/* We got a non-partial file, so no need to scan further */
+					break;
+				}
+			}
+
+			pg_log_error("could not open file \"%s\": %m", fm->filename);
+			free(filemaps);
+			exit(1);
+		}
+		else
+		{
+			fm->isPartial = true;
+			fm->dirIndex = i;
+		}
+	}
+
+	/* We must have found the base file. */
+	if (!basefilefound)
+	{
+		pg_log_error("could not find base file \"%s\": %m", fn);
+		free(filemaps);
+		exit(1);
+	}
+
+	/* Process all opened files. */
+	lastblkno = 0;
+	modifiedblockfound = false;
+	for (i = 0; i < fmindex; i++)
+	{
+		fm = &filemaps[i];
+		if (fstat(fileno(fm->fp), &statbuf) != 0)
+		{
+			pg_log_error("could not stat file \"%s\": %m", fm->filename);
+			free(filemaps);
+			exit(1);
+		}
+
+		if (fm->isPartial)
+		{
+			char	   *buf;
+			int			hsize;
+			int			k;
+			int			blkstartoffset;
+			int			blknumberssize;
+			uint32	   *blknumbers;
+			partial_file_header *pfh;
+			pg_crc32c	savedchecksum;
+
+			hsize = offsetof(partial_file_header, blocknumbers);
+			buf = (char *) malloc(hsize);
+
+			/* Read partial file header. */
+			if (fread(buf, 1, hsize, fm->fp) != hsize)
+			{
+				pg_log_error("corrupted partial file \"%s\": %m", fm->filename);
+				free(filemaps);
+				free(buf);
+				exit(1);
+			}
+
+			pfh = (partial_file_header *) buf;
+
+			/* Check magic */
+			if (pfh->magic != INCREMENTAL_BACKUP_MAGIC)
+			{
+				pg_log_error("corrupted partial file \"%s\", magic mismatch: %m", fm->filename);
+				free(filemaps);
+				free(buf);
+				exit(1);
+			}
+
+			blknumberssize = sizeof(uint32) * pfh->nblocks;
+			blknumbers = (uint32 *) malloc(blknumberssize);
+
+			/* Read all block numbers. */
+			if (fread((char *) blknumbers, 1, blknumberssize, fm->fp) != blknumberssize)
+			{
+				pg_log_error("corrupted partial file \"%s\": %m", fm->filename);
+				free(blknumbers);
+				free(buf);
+				free(filemaps);
+				exit(1);
+			}
+
+			/* Check CRC */
+			savedchecksum = pfh->checksum;
+			INIT_CRC32C(pfh->checksum);
+			COMP_CRC32C(pfh->checksum, pfh, hsize);
+			COMP_CRC32C(pfh->checksum, blknumbers, blknumberssize);
+			if (pfh->checksum != savedchecksum)
+			{
+				pg_log_error("corrupted partial file \"%s\", checksum mismatch: %m", fm->filename);
+				free(blknumbers);
+				free(filemaps);
+				free(buf);
+				exit(1);
+			}
+			else if (verbose)
+				pg_log_info("checksums verified in file \"%s\"", fm->filename);
+
+			blkstartoffset = hsize + blknumberssize;
+			for (k = 0; k < pfh->nblocks; k++)
+			{
+				uint32		blknum = blknumbers[k];
+
+				/*
+				 * Set this block pointer in outblock array.  We skip setting
+				 * it if already set as we are processing from latest file to
+				 * oldest file.  If same block is modified across multiple
+				 * incremental backup, then we use the latest one; skipping all
+				 * other.
+				 */
+				if (outblocks[blknum].fp == NULL)
+				{
+					outblocks[blknum].fp = fm->fp;
+					outblocks[blknum].offset = blkstartoffset + BLCKSZ * k;
+				}
+
+				modifiedblockfound = true;
+			}
+
+			/* Update last block number */
+			if (k != 0 && blknumbers[k - 1] > lastblkno)
+				lastblkno = (int) blknumbers[k - 1];
+		}
+	}
+
+	/* Read base file */
+	Assert(i == fmindex);
+
+	fm = &filemaps[fmindex];
+	if (fstat(fileno(fm->fp), &statbuf) != 0)
+	{
+		pg_log_error("could not stat file \"%s\": %m", fm->filename);
+		free(filemaps);
+		exit(1);
+	}
+
+	Assert(fm->isPartial == false && (statbuf.st_size % BLCKSZ) == 0);
+
+	/*
+	 * If after processing all .partial files, we end up with no blocks
+	 * modified, then simply copy the base file to the output directory and
+	 * we are done.
+	 */
+	if (!modifiedblockfound)
+	{
+		copy_whole_file(fm->filename, outfn);
+		cleanup_filemaps(filemaps, fmindex + 1);
+		return;
+	}
+
+	/* Write all blocks to the output file */
+
+	nblocks = statbuf.st_size / BLCKSZ;
+	if ((nblocks - 1) > lastblkno)
+		lastblkno = nblocks - 1;
+
+	outfp = fopen(outfn, "wb");
+	if (!outfp)
+	{
+		pg_log_error("could not create file \"%s\": %m", outfn);
+		cleanup_filemaps(filemaps, fmindex + 1);
+		exit(1);
+	}
+
+	for (i = 0; i <= lastblkno; i++)
+	{
+		char		blkdata[BLCKSZ];
+		FILE	   *infp;
+		int			offset;
+
+		/*
+		 * Read block by block from respective file.  If outblock has NULL
+		 * file pointer, then fetch that block from the base file.
+		 */
+		if (outblocks[i].fp != NULL)
+		{
+			infp = outblocks[i].fp;
+			offset = outblocks[i].offset;
+		}
+		else
+		{
+			infp = fm->fp;
+			offset = i * BLCKSZ;
+		}
+
+		if (fseek(infp, offset, SEEK_SET) == -1)
+		{
+			pg_log_error("could not fseek in file: %m");
+			cleanup_filemaps(filemaps, fmindex + 1);
+			exit(1);
+		}
+
+		if (fread(blkdata, 1, BLCKSZ, infp) != BLCKSZ)
+		{
+			pg_log_error("could not read from file \"%s\": %m", outfn);
+			cleanup_filemaps(filemaps, fmindex + 1);
+			exit(1);
+		}
+
+		/* Finally write one block to the output file */
+		if (fwrite(blkdata, 1, BLCKSZ, outfp) != BLCKSZ)
+		{
+			pg_log_error("could not write to file \"%s\": %m", outfn);
+			cleanup_filemaps(filemaps, fmindex + 1);
+			exit(1);
+		}
+	}
+
+	fclose(outfp);
+
+	cleanup_filemaps(filemaps, fmindex + 1);
+	return;
+}
+
+static void
+cleanup_filemaps(FileMap *filemaps, int nfilemaps)
+{
+	int			i;
+
+	for (i = 0; i < nfilemaps; i++)
+		fclose(filemaps[i].fp);
+
+	free(filemaps);
+}

Reply via email to