Hi hackers,

Currently Postgres has options for continuous WAL files archiving, which is quite often used along with master-replica setup. OK, then the worst is happened and it's time to get your old master back and synchronize it with new master (ex-replica) with pg_rewind. However, required WAL files may be already archived and pg_rewind will fail. You can copy these files manually, but it is difficult to calculate, which ones you need. Anyway, it complicates building failover system with automatic failure recovery.

I expect, that it will be a good idea to allow pg_rewind to look for a restore_command in the target data directory recovery.conf or pass it is as a command line argument. Then pg_rewind can use it to get missing WAL files from the archive. I had a few talks with DBAs and came to conclusion, that this is a highly requested feature.

I prepared a proof of concept patch (please, find attached), which does exactly what I described above. I played with it a little and it seems to be working, tests were accordingly updated to verify this archive retrieval functionality too.

Patch is relatively simple excepting the one part: if we want to parse recovery.conf (with all possible includes, etc.) and get restore_command, then we should use guc-file.l parser, which is heavily linked to backend, e.g. in error reporting part. So I copied it and made frontend-safe version guc-file-fe.l. Personally, I don't think it's a good idea, but nothing else came to mind. It is also possible to leave the only one option -- passing restore_command as command line argument.

What do you think?


--

Alexey Kondratov

Postgres Professional: https://www.postgrespro.com

Russian Postgres Company

diff --combined src/bin/pg_rewind/Makefile
index a22fef1352,2bcfcc61af..0000000000
--- a/src/bin/pg_rewind/Makefile
+++ b/src/bin/pg_rewind/Makefile
@@@ -20,7 -20,6 +20,7 @@@ LDFLAGS_INTERNAL += $(libpq_pgport
  
  OBJS	= pg_rewind.o parsexlog.o xlogreader.o datapagemap.o timeline.o \
  	fetch.o file_ops.o copy_fetch.o libpq_fetch.o filemap.o logging.o \
 +	guc-file-fe.o \
  	$(WIN32RES)
  
  EXTRA_CLEAN = xlogreader.c
diff --combined src/bin/pg_rewind/RewindTest.pm
index 8dc39dbc05,1dce56d035..0000000000
--- a/src/bin/pg_rewind/RewindTest.pm
+++ b/src/bin/pg_rewind/RewindTest.pm
@@@ -40,7 -40,6 +40,7 @@@ use Config
  use Exporter 'import';
  use File::Copy;
  use File::Path qw(rmtree);
 +use File::Glob;
  use IPC::Run qw(run);
  use PostgresNode;
  use TestLib;
@@@ -249,41 -248,6 +249,41 @@@ sub run_pg_rewin
  				"--no-sync"
  			],
  			'pg_rewind remote');
 +	}
 +	elsif ($test_mode eq "archive")
 +	{
 +
 +		# Do rewind using a local pgdata as source and
 +		# specified directory with target WALs archive.
 +		my $wals_archive_dir = "${TestLib::tmp_check}/master_wals_archive";
 +		my $test_master_datadir = $node_master->data_dir;
 +		my @wal_files = glob "$test_master_datadir/pg_wal/0000000*";
 +		my $restore_command;
 +
 +		rmtree($wals_archive_dir);
 +		mkdir($wals_archive_dir) or die;
 +
 +		# Move all old master WAL files to the archive.
 +		# Old master should be stopped at this point.
 +		foreach my $wal_file (@wal_files)
 +		{
 +			move($wal_file, "$wals_archive_dir/") or die;
 +		}
 +
 +		$restore_command = "cp $wals_archive_dir/\%f \%p";
 +
 +		# Stop the new master and be ready to perform the rewind.
 +		$node_standby->stop;
 +		command_ok(
 +			[
 +				'pg_rewind',
 +				"--debug",
 +				"--source-pgdata=$standby_pgdata",
 +				"--target-pgdata=$master_pgdata",
 +				"--no-sync",
 +				"-R", $restore_command
 +			],
 +			'pg_rewind archive');
  	}
  	else
  	{
diff --combined src/bin/pg_rewind/parsexlog.c
index 11a9c26cd2,40028471bf..0000000000
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@@ -12,7 -12,6 +12,7 @@@
  #include "postgres_fe.h"
  
  #include <unistd.h>
 +#include <sys/stat.h>
  
  #include "pg_rewind.h"
  #include "filemap.h"
@@@ -46,10 -45,7 +46,10 @@@ static char xlogfpath[MAXPGPATH]
  typedef struct XLogPageReadPrivate
  {
  	const char *datadir;
 +	const char *restoreCommand;
  	int			tliIndex;
 +	XLogRecPtr  oldrecptr;
 +	TimeLineID  oldtli;
  } XLogPageReadPrivate;
  
  static int SimpleXLogPageRead(XLogReaderState *xlogreader,
@@@ -57,10 -53,6 +57,10 @@@
  				   int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
  				   TimeLineID *pageTLI);
  
 +static bool RestoreArchivedWAL(const char *path, const char *xlogfname, 
 +					off_t expectedSize, const char *restoreCommand,
 +					const char *lastRestartPointFname);
 +
  /*
   * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
   * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
@@@ -68,19 -60,15 +68,19 @@@
   */
  void
  extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 -			   XLogRecPtr endpoint)
 +			   ControlFileData *targetCF, const char *restore_command)
  {
  	XLogRecord *record;
 +	XLogRecPtr endpoint = targetCF->checkPoint;
  	XLogReaderState *xlogreader;
  	char	   *errormsg;
  	XLogPageReadPrivate private;
  
  	private.datadir = datadir;
  	private.tliIndex = tliIndex;
 +	private.restoreCommand = restore_command;
 +	private.oldrecptr = targetCF->checkPointCopy.redo;
 +	private.oldtli = targetCF->checkPointCopy.ThisTimeLineID;
  	xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead,
  									&private);
  	if (xlogreader == NULL)
@@@ -166,9 -154,9 +166,9 @@@ readOneRecord(const char *datadir, XLog
   * Find the previous checkpoint preceding given WAL location.
   */
  void
 -findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 +findLastCheckpoint(const char *datadir, ControlFileData *targetCF, XLogRecPtr forkptr, int tliIndex,
  				   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
 -				   XLogRecPtr *lastchkptredo)
 +				   XLogRecPtr *lastchkptredo, const char *restoreCommand)
  {
  	/* Walk backwards, starting from the given record */
  	XLogRecord *record;
@@@ -193,9 -181,6 +193,9 @@@
  
  	private.datadir = datadir;
  	private.tliIndex = tliIndex;
 +	private.restoreCommand = restoreCommand;
 +	private.oldrecptr = targetCF->checkPointCopy.redo;
 +	private.oldtli = targetCF->checkPointCopy.ThisTimeLineID;
  	xlogreader = XLogReaderAllocate(WalSegSz, &SimpleXLogPageRead,
  									&private);
  	if (xlogreader == NULL)
@@@ -306,53 -291,9 +306,53 @@@ SimpleXLogPageRead(XLogReaderState *xlo
  
  		if (xlogreadfd < 0)
  		{
 -			printf(_("could not open file \"%s\": %s\n"), xlogfpath,
 +			bool  restore_ok;
 +			char  lastRestartPointFname[MAXFNAMELEN];
 +			XLogSegNo restartSegNo;
 +
 +			/*
 +			 * If we have no restore_command to execute, then exit.
 +			 */
 +			if (private->restoreCommand == NULL)
 +			{
 +				printf(_("could not open file \"%s\": %s\n"), xlogfpath,
  				   strerror(errno));
 -			return -1;
 +				return -1;
 +			}
 +			
 +			XLByteToSeg(private->oldrecptr, restartSegNo, WalSegSz);
 +			XLogFileName(lastRestartPointFname, private->oldtli, restartSegNo,
 +						 WalSegSz);
 +
 +			/*
 +			 * Since we have restore_command to execute, then try to retreive
 +			 * missing WAL file from the archive.
 +			 */
 +			restore_ok = RestoreArchivedWAL(private->datadir,
 +											xlogfname,
 +											WalSegSz,
 +											private->restoreCommand,
 +											lastRestartPointFname);
 +
 +			if (restore_ok)
 +			{
 +				xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
 +
 +				if (xlogreadfd < 0)
 +				{
 +					printf(_("could not open restored from archive file \"%s\": %s\n"), xlogfpath,
 +				    	   strerror(errno));
 +				    return -1;
 +				}
 +				else
 +					pg_log(PG_DEBUG, "using restored from archive version of file \"%s\"\n", xlogfpath);
 +			}
 +			else
 +			{
 +				printf(_("could not restore file \"%s\" from archive: %s\n"), xlogfname,
 +					   strerror(errno));
 +				return -1;
 +			}
  		}
  	}
  
@@@ -468,116 -409,3 +468,116 @@@ extractPageInfo(XLogReaderState *record
  		process_block_change(forknum, rnode, blkno);
  	}
  }
 +
 +/*
 + * Attempt to retrieve the specified file from off-line archival storage.
 + * If successful return true.
 + *
 + * For fixed-size files, the caller may pass the expected size as an
 + * additional crosscheck on successful recovery. If the file size is not
 + * known, set expectedSize = 0.
 + * 
 + * This is an adapted to frontend version of
 + * RestoreArchivedFile function from transam/xlogarchive.c
 + */
 +bool
 +RestoreArchivedWAL(const char *path, const char *xlogfname, off_t expectedSize,
 +				   const char *restoreCommand, const char *lastRestartPointFname)
 +{
 +	char		xlogpath[MAXPGPATH];
 +	char		xlogRestoreCmd[MAXPGPATH];
 +	char	   *dp;
 +	char	   *endp;
 +	const char *sp;
 +	int			rc;
 +	struct stat stat_buf;
 +
 +	snprintf(xlogpath, MAXPGPATH, "%s/" XLOGDIR "/%s", path, xlogfname);
 +
 +	/*
 +	 * Construct the command to be executed.
 +	 */
 +	dp = xlogRestoreCmd;
 +	endp = xlogRestoreCmd + MAXPGPATH - 1;
 +	*endp = '\0';
 +
 +	for (sp = restoreCommand; *sp; sp++)
 +	{
 +		if (*sp == '%')
 +		{
 +			switch (sp[1])
 +			{
 +				case 'p':
 +					/* %p: relative path of target file */
 +					sp++;
 +					StrNCpy(dp, xlogpath, endp - dp);
 +					make_native_path(dp);
 +					dp += strlen(dp);
 +					break;
 +				case 'f':
 +					/* %f: filename of desired file */
 +					sp++;
 +					StrNCpy(dp, xlogfname, endp - dp);
 +					dp += strlen(dp);
 +					break;
 +				case 'r':
 +					/* %r: filename of last restartpoint */
 +					sp++;
 +					StrNCpy(dp, lastRestartPointFname, endp - dp);
 +					dp += strlen(dp);
 +					break;
 +				case '%':
 +					/* convert %% to a single % */
 +					sp++;
 +					if (dp < endp)
 +						*dp++ = *sp;
 +					break;
 +				default:
 +					/* otherwise treat the % as not special */
 +					if (dp < endp)
 +						*dp++ = *sp;
 +					break;
 +			}
 +		}
 +		else
 +		{
 +			if (dp < endp)
 +				*dp++ = *sp;
 +		}
 +	}
 +	*dp = '\0';
 +
 +	/*
 +	 * Execute restore_command, which should copy
 +	 * the missing WAL file from archival storage.
 +	 */
 +	rc = system(xlogRestoreCmd);
 +
 +	if (rc == 0)
 +	{
 +		/*
 +		 * Command apparently succeeded, but let's make sure the file is
 +		 * really there now and has the correct size.
 +		 */
 +		if (stat(xlogpath, &stat_buf) == 0)
 +		{
 +			if (expectedSize > 0 && stat_buf.st_size != expectedSize)
 +			{
 +				printf(_("archive file \"%s\" has wrong size: %lu instead of %lu, %s"),
 +					   	xlogfname, (unsigned long) stat_buf.st_size, (unsigned long) expectedSize,
 +					   	strerror(errno));
 +			}
 +			else
 +				return true;
 +		}
 +		else
 +		{
 +			/* Stat failed */
 +			printf(_("could not stat file \"%s\": %s"),
 +				   	xlogpath,
 +					strerror(errno));
 +		}
 +	}
 +
 +	return false;
 +}
diff --combined src/bin/pg_rewind/pg_rewind.c
index 3acac817eb,9653106386..0000000000
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@@ -19,13 -19,11 +19,13 @@@
  #include "file_ops.h"
  #include "filemap.h"
  #include "logging.h"
 +#include "guc-file-fe.h"
  
  #include "access/timeline.h"
  #include "access/xlog_internal.h"
  #include "catalog/catversion.h"
  #include "catalog/pg_control.h"
 +#include "common/controldata_utils.h"
  #include "common/file_perm.h"
  #include "common/file_utils.h"
  #include "common/restricted_token.h"
@@@ -54,13 -52,11 +54,13 @@@ int			WalSegSz
  char	   *datadir_target = NULL;
  char	   *datadir_source = NULL;
  char	   *connstr_source = NULL;
 +char	   *restore_command = NULL;
  
  bool		debug = false;
  bool		showprogress = false;
  bool		dry_run = false;
  bool		do_sync = true;
 +bool		restore_wals = false;
  
  /* Target history */
  TimeLineHistoryEntry *targetHistory;
@@@ -79,9 -75,6 +79,9 @@@ usage(const char *progname
  	printf(_("  -N, --no-sync                  do not wait for changes to be written\n"));
  	printf(_("                                 safely to disk\n"));
  	printf(_("  -P, --progress                 write progress messages\n"));
 +	printf(_("  -r, --use-recovery-conf        use restore_command in the recovery.conf to\n"));
 +	printf(_("                                 retreive WALs from archive\n"));
 +	printf(_("  -R, --restore-command=COMMAND  restore command\n"));
  	printf(_("      --debug                    write a lot of debug messages\n"));
  	printf(_("  -V, --version                  output version information, then exit\n"));
  	printf(_("  -?, --help                     show this help, then exit\n"));
@@@ -101,12 -94,9 +101,12 @@@ main(int argc, char **argv
  		{"dry-run", no_argument, NULL, 'n'},
  		{"no-sync", no_argument, NULL, 'N'},
  		{"progress", no_argument, NULL, 'P'},
 +		{"use-recovery-conf", no_argument, NULL, 'r'},
 +		{"restore-command", required_argument, NULL, 'R'},
  		{"debug", no_argument, NULL, 3},
  		{NULL, 0, NULL, 0}
  	};
 +	char		recfile_fullpath[MAXPGPATH];
  	int			option_index;
  	int			c;
  	XLogRecPtr	divergerec;
@@@ -139,7 -129,7 +139,7 @@@
  		}
  	}
  
 -	while ((c = getopt_long(argc, argv, "D:nNP", long_options, &option_index)) != -1)
 +	while ((c = getopt_long(argc, argv, "DR:nNPr", long_options, &option_index)) != -1)
  	{
  		switch (c)
  		{
@@@ -150,10 -140,6 +150,10 @@@
  			case 'P':
  				showprogress = true;
  				break;
 +			
 +			case 'r':
 +				restore_wals = true;
 +				break;
  
  			case 'n':
  				dry_run = true;
@@@ -171,10 -157,6 +171,10 @@@
  				datadir_target = pg_strdup(optarg);
  				break;
  
 +			case 'R':
 +				restore_command = pg_strdup(optarg);
 +				break;
 +
  			case 1:				/* --source-pgdata */
  				datadir_source = pg_strdup(optarg);
  				break;
@@@ -241,71 -223,6 +241,71 @@@
  
  	umask(pg_mode_mask);
  
 +	if (restore_command != NULL)
 +	{
 +		pg_log(PG_DEBUG, "using command line restore_command=\'%s\'.\n", restore_command);
 +	}
 +	else if (restore_wals)
 +	{
 +		FILE	   *recovery_conf_file;
 +
 +		/* 
 +		 * Look for recovery.conf in the target data directory and
 +		 * try to get restore_command from there.
 +		 */
 +		snprintf(recfile_fullpath, sizeof(recfile_fullpath), "%s/%s", datadir_target, RECOVERY_COMMAND_FILE);
 +		recovery_conf_file = fopen(recfile_fullpath, "r");
 +
 +		if (recovery_conf_file == NULL)
 +		{
 +			fprintf(stderr, _("%s: option -r/--use-recovery-conf is specified, but recovery.conf is absent in the target directory\n"),
 +					progname);
 +			fprintf(stdout, _("You have to add recovery.conf or pass restore_command with -R/--restore-command option.\n"));
 +			exit(1);
 +		}
 +		else
 +		{
 +			ConfigVariable *item,
 +						   *head = NULL,
 +						   *tail = NULL;
 +			bool			config_is_parsed;
 +
 +			/*
 +			 * We pass a fullpath to the recovery.conf as calling_file here, since
 +			 * parser will use its parent directory as base for all further includes
 +			 * if any exist.
 +			 */
 +			config_is_parsed = ParseConfigFile(RECOVERY_COMMAND_FILE, true,
 +											   recfile_fullpath, 0, 0,
 +											   PG_WARNING, &head, &tail);
 +			fclose(recovery_conf_file);
 +
 +			if (config_is_parsed)
 +			{
 +				for (item = head; item; item = item->next)
 +				{
 +					if (strcmp(item->name, "restore_command") == 0)
 +					{
 +						if (restore_command != NULL)
 +						{
 +							pfree(restore_command);
 +							restore_command = NULL;
 +						}
 +						restore_command = pstrdup(item->value);
 +						pg_log(PG_DEBUG, "using restore_command=\'%s\' from recovery.conf.\n", restore_command);
 +					}
 +				}
 +
 +				if (restore_command == NULL)
 +					pg_fatal("could not find restore_command in recovery.conf file %s\n", recfile_fullpath);
 +			}
 +			else
 +				pg_fatal("could not parse recovery.conf file %s\n", recfile_fullpath);
 +
 +			FreeConfigVariables(head);
 +		}
 +	}
 +
  	/* Connect to remote server */
  	if (connstr_source)
  		libpqConnect(connstr_source);
@@@ -377,9 -294,9 +377,9 @@@
  		exit(0);
  	}
  
 -	findLastCheckpoint(datadir_target, divergerec,
 +	findLastCheckpoint(datadir_target, &ControlFile_target, divergerec,
  					   lastcommontliIndex,
 -					   &chkptrec, &chkpttli, &chkptredo);
 +					   &chkptrec, &chkpttli, &chkptredo, restore_command);
  	printf(_("rewinding from last common checkpoint at %X/%X on timeline %u\n"),
  		   (uint32) (chkptrec >> 32), (uint32) chkptrec,
  		   chkpttli);
@@@ -402,7 -319,7 +402,7 @@@
  	 */
  	pg_log(PG_PROGRESS, "reading WAL in target\n");
  	extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
 -				   ControlFile_target.checkPoint);
 +				   &ControlFile_target, restore_command);
  	filemap_finalize();
  
  	if (showprogress)
diff --combined src/bin/pg_rewind/pg_rewind.h
index ba9d13e9bd,3f4ba7a267..0000000000
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@@ -14,7 -14,6 +14,7 @@@
  #include "datapagemap.h"
  
  #include "access/timeline.h"
 +#include "catalog/pg_control.h"
  #include "storage/block.h"
  #include "storage/relfilenode.h"
  
@@@ -33,10 -32,11 +33,10 @@@ extern int	targetNentries
  
  /* in parsexlog.c */
  extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
 -			   int tliIndex, XLogRecPtr endpoint);
 -extern void findLastCheckpoint(const char *datadir, XLogRecPtr searchptr,
 -				   int tliIndex,
 -				   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
 -				   XLogRecPtr *lastchkptredo);
 +			   int tliIndex, ControlFileData *targetCF, const char *restoreCommand);
 +extern void findLastCheckpoint(const char *datadir, ControlFileData *targetCF, XLogRecPtr searchptr,
 +				   int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
 +				   XLogRecPtr *lastchkptredo, const char *restoreCommand);
  extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr,
  			  int tliIndex);
  
diff --combined src/bin/pg_rewind/t/001_basic.pl
index 22777bff82,53dbf45be2..0000000000
--- a/src/bin/pg_rewind/t/001_basic.pl
+++ b/src/bin/pg_rewind/t/001_basic.pl
@@@ -1,7 -1,7 +1,7 @@@
  use strict;
  use warnings;
  use TestLib;
 -use Test::More tests => 10;
 +use Test::More tests => 15;
  
  use RewindTest;
  
@@@ -103,6 -103,5 +103,6 @@@ in master, before promotio
  # Run the test in both modes
  run_test('local');
  run_test('remote');
 +run_test('archive');
  
  exit(0);
diff --combined src/bin/pg_rewind/t/002_databases.pl
index 10c835efc1,2c9e427831..0000000000
--- a/src/bin/pg_rewind/t/002_databases.pl
+++ b/src/bin/pg_rewind/t/002_databases.pl
@@@ -1,7 -1,7 +1,7 @@@
  use strict;
  use warnings;
  use TestLib;
 -use Test::More tests => 6;
 +use Test::More tests => 9;
  
  use RewindTest;
  
@@@ -59,6 -59,5 +59,6 @@@ template
  # Run the test in both modes.
  run_test('local');
  run_test('remote');
 +run_test('archive');
  
  exit(0);
diff --combined src/bin/pg_rewind/t/003_extrafiles.pl
index f94ce329f4,496f38c457..0000000000
--- a/src/bin/pg_rewind/t/003_extrafiles.pl
+++ b/src/bin/pg_rewind/t/003_extrafiles.pl
@@@ -3,7 -3,7 +3,7 @@@
  use strict;
  use warnings;
  use TestLib;
 -use Test::More tests => 4;
 +use Test::More tests => 6;
  
  use File::Find;
  
@@@ -87,6 -87,5 +87,6 @@@ sub run_tes
  # Run the test in both modes.
  run_test('local');
  run_test('remote');
 +run_test('archive');
  
  exit(0);
diff --git a/src/bin/pg_rewind/guc-file-fe.h b/src/bin/pg_rewind/guc-file-fe.h
new file mode 100644
index 0000000000..cf480b806a
--- /dev/null
+++ b/src/bin/pg_rewind/guc-file-fe.h
@@ -0,0 +1,40 @@
+#ifndef PG_REWIND_GUC_FILE_FE_H
+#define PG_REWIND_GUC_FILE_FE_H
+
+#include "c.h"
+
+#define RECOVERY_COMMAND_FILE	"recovery.conf"
+
+/*
+ * Parsing the configuration file(s) will return a list of name-value pairs
+ * with source location info.  We also abuse this data structure to carry
+ * error reports about the config files.  An entry reporting an error will
+ * have errmsg != NULL, and might have NULLs for name, value, and/or filename.
+ *
+ * If "ignore" is true, don't attempt to apply the item (it might be an error
+ * report, or an item we determined to be duplicate).  "applied" is set true
+ * if we successfully applied, or could have applied, the setting.
+ */
+typedef struct ConfigVariable
+{
+	char	   *name;
+	char	   *value;
+	char	   *errmsg;
+	char	   *filename;
+	int			sourceline;
+	bool		ignore;
+	bool		applied;
+	struct ConfigVariable *next;
+} ConfigVariable;
+
+extern bool ParseConfigFile(const char *config_file, bool strict,
+				const char *calling_file, int calling_lineno,
+				int depth, int elevel,
+				ConfigVariable **head_p, ConfigVariable **tail_p);
+
+extern bool ParseConfigFp(FILE *fp, const char *config_file, int depth, int elevel,
+				ConfigVariable **head_p, ConfigVariable **tail_p);
+
+extern void FreeConfigVariables(ConfigVariable *list);
+
+#endif							/* PG_REWIND_GUC_FILE_FE_H */
diff --git a/src/bin/pg_rewind/guc-file-fe.l b/src/bin/pg_rewind/guc-file-fe.l
new file mode 100644
index 0000000000..13882191ba
--- /dev/null
+++ b/src/bin/pg_rewind/guc-file-fe.l
@@ -0,0 +1,774 @@
+/* -*-pgsql-c-*- */
+/*
+ * Configuration files parser for usage in frontend.
+ *
+ * Copyright (c) 2000-2018, PostgreSQL Global Development Group
+ *
+ * src/bin/pg_rewind/guc-file-fe.l
+ * Modified version of src/backend/utils/misc/guc-file.l
+ */
+
+%{
+
+#include <ctype.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "guc-file-fe.h"
+#include "logging.h"
+
+#include "common/fe_memutils.h"
+
+/*
+ * flex emits a yy_fatal_error() function that it calls in response to
+ * critical errors like malloc failure, file I/O errors, and detection of
+ * internal inconsistency.  That function prints a message and calls exit().
+ * Mutate it to instead call our handler, which jumps out of the parser.
+ */
+#undef fprintf
+#define fprintf(file, fmt, msg) GUC_flex_fatal(msg)
+
+enum
+{
+	GUC_ID = 1,
+	GUC_STRING = 2,
+	GUC_INTEGER = 3,
+	GUC_REAL = 4,
+	GUC_EQUALS = 5,
+	GUC_UNQUOTED_STRING = 6,
+	GUC_QUALIFIED_ID = 7,
+	GUC_EOL = 99,
+	GUC_ERROR = 100
+};
+
+static unsigned int ConfigFileLineno;
+static const char *GUC_flex_fatal_errmsg;
+static sigjmp_buf *GUC_flex_fatal_jmp;
+
+static void FreeConfigVariable(ConfigVariable *item);
+
+static void record_config_file_error(const char *errmsg,
+						 const char *config_file,
+						 int lineno,
+						 ConfigVariable **head_p,
+						 ConfigVariable **tail_p);
+
+static int	GUC_flex_fatal(const char *msg);
+static char *GUC_scanstr(const char *s);
+static bool ParseConfigDirectory(const char *includedir,
+					 const char *calling_file, int calling_lineno,
+					 int depth, int elevel,
+					 ConfigVariable **head_p,
+					 ConfigVariable **tail_p);
+
+/* LCOV_EXCL_START */
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="GUC_yy"
+
+
+SIGN			("-"|"+")
+DIGIT			[0-9]
+HEXDIGIT		[0-9a-fA-F]
+
+UNIT_LETTER		[a-zA-Z]
+
+INTEGER			{SIGN}?({DIGIT}+|0x{HEXDIGIT}+){UNIT_LETTER}*
+
+EXPONENT		[Ee]{SIGN}?{DIGIT}+
+REAL			{SIGN}?{DIGIT}*"."{DIGIT}*{EXPONENT}?
+
+LETTER			[A-Za-z_\200-\377]
+LETTER_OR_DIGIT [A-Za-z_0-9\200-\377]
+
+ID				{LETTER}{LETTER_OR_DIGIT}*
+QUALIFIED_ID	{ID}"."{ID}
+
+UNQUOTED_STRING {LETTER}({LETTER_OR_DIGIT}|[-._:/])*
+STRING			\'([^'\\\n]|\\.|\'\')*\'
+
+%%
+
+\n				ConfigFileLineno++; return GUC_EOL;
+[ \t\r]+		/* eat whitespace */
+#.*				/* eat comment (.* matches anything until newline) */
+
+{ID}			return GUC_ID;
+{QUALIFIED_ID}	return GUC_QUALIFIED_ID;
+{STRING}		return GUC_STRING;
+{UNQUOTED_STRING} return GUC_UNQUOTED_STRING;
+{INTEGER}		return GUC_INTEGER;
+{REAL}			return GUC_REAL;
+=				return GUC_EQUALS;
+
+.				return GUC_ERROR;
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * the bare comparison function for GUC names
+ */
+static int
+guc_name_compare(const char *namea, const char *nameb)
+{
+	/*
+	 * The temptation to use strcasecmp() here must be resisted, because the
+	 * array ordering has to remain stable across setlocale() calls. So, build
+	 * our own with a simple ASCII-only downcasing.
+	 */
+	while (*namea && *nameb)
+	{
+		char		cha = *namea++;
+		char		chb = *nameb++;
+
+		if (cha >= 'A' && cha <= 'Z')
+			cha += 'a' - 'A';
+		if (chb >= 'A' && chb <= 'Z')
+			chb += 'a' - 'A';
+		if (cha != chb)
+			return cha - chb;
+	}
+	if (*namea)
+		return 1;				/* a is longer */
+	if (*nameb)
+		return -1;				/* b is longer */
+	return 0;
+}
+
+/*
+ * Given a configuration file or directory location that may be a relative
+ * path, return an absolute one.  We consider the location to be relative to
+ * the directory holding the calling file.
+ */
+static char *
+AbsoluteConfigLocation(const char *location, const char *calling_file)
+{
+	char abs_path[MAXPGPATH];
+
+	if (is_absolute_path(location))
+		return pstrdup(location);
+	else
+	{
+		strlcpy(abs_path, calling_file, sizeof(abs_path));
+		get_parent_directory(abs_path);
+		join_path_components(abs_path, abs_path, location);
+		canonicalize_path(abs_path);
+
+		return pstrdup(abs_path);
+	}
+}
+
+/*
+ * Read and parse a single configuration file.  This function recurses
+ * to handle "include" directives.
+ *
+ * If "strict" is true, treat failure to open the config file as an error,
+ * otherwise just skip the file.
+ *
+ * calling_file/calling_lineno identify the source of the request.
+ * Pass NULL/0 if not recursing from an inclusion request.
+ *
+ * See ParseConfigFp for further details.  This one merely adds opening the
+ * config file rather than working from a caller-supplied file descriptor,
+ * and absolute-ifying the path name if necessary.
+ */
+bool
+ParseConfigFile(const char *config_file, bool strict,
+				const char *calling_file, int calling_lineno,
+				int depth, int elevel,
+				ConfigVariable **head_p,
+				ConfigVariable **tail_p)
+{
+	char	   *abs_path;
+	bool		OK = true;
+	FILE	   *fp;
+
+	/*
+	 * Reject too-deep include nesting depth.  This is just a safety check to
+	 * avoid dumping core due to stack overflow if an include file loops back
+	 * to itself.  The maximum nesting depth is pretty arbitrary.
+	 */
+	if (depth > 10)
+	{
+		pg_log(PG_WARNING, 
+				"could not open configuration file \"%s\": maximum nesting depth exceeded\n",
+				config_file);
+		record_config_file_error("nesting depth exceeded",
+								 calling_file, calling_lineno,
+								 head_p, tail_p);
+		return false;
+	}
+
+	abs_path = AbsoluteConfigLocation(config_file, calling_file);
+	fp = fopen(abs_path, "r");
+	if (!fp)
+	{
+		if (strict)
+		{
+			pg_log(PG_WARNING, 
+				"could not open configuration file \"%s\"\n",
+				abs_path);
+			record_config_file_error(psprintf("could not open file \"%s\"",
+											  abs_path),
+									 calling_file, calling_lineno,
+									 head_p, tail_p);
+			OK = false;
+		}
+		else
+		{
+			pg_log(PG_WARNING, 
+				"skipping missing configuration file \"%s\"\n",
+				abs_path);
+		}
+		goto cleanup;
+	}
+
+	OK = ParseConfigFp(fp, abs_path, depth, elevel, head_p, tail_p);
+
+cleanup:
+	if (fp)
+		fclose(fp);
+	pfree(abs_path);
+
+	return OK;
+}
+
+/*
+ * Capture an error message in the ConfigVariable list returned by
+ * config file parsing.
+ */
+static void
+record_config_file_error(const char *errmsg,
+						 const char *config_file,
+						 int lineno,
+						 ConfigVariable **head_p,
+						 ConfigVariable **tail_p)
+{
+	ConfigVariable *item;
+
+	item = palloc(sizeof *item);
+	item->name = NULL;
+	item->value = NULL;
+	item->errmsg = pstrdup(errmsg);
+	item->filename = config_file ? pstrdup(config_file) : NULL;
+	item->sourceline = lineno;
+	item->ignore = true;
+	item->applied = false;
+	item->next = NULL;
+	if (*head_p == NULL)
+		*head_p = item;
+	else
+		(*tail_p)->next = item;
+	*tail_p = item;
+}
+
+/*
+ * Flex fatal errors bring us here.  Stash the error message and jump back to
+ * ParseConfigFp().  Assume all msg arguments point to string constants; this
+ * holds for flex 2.5.31 (earliest we support) and flex 2.5.35 (latest as of
+ * this writing).  Otherwise, we would need to copy the message.
+ *
+ * We return "int" since this takes the place of calls to fprintf().
+*/
+static int
+GUC_flex_fatal(const char *msg)
+{
+	GUC_flex_fatal_errmsg = msg;
+	siglongjmp(*GUC_flex_fatal_jmp, 1);
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * Read and parse a single configuration file.  This function recurses
+ * to handle "include" directives.
+ *
+ * Input parameters:
+ *	fp: file pointer from AllocateFile for the configuration file to parse
+ *	config_file: absolute or relative path name of the configuration file
+ *	depth: recursion depth (should be 0 in the outermost call)
+ *	elevel: error logging level to use
+ * Input/Output parameters:
+ *	head_p, tail_p: head and tail of linked list of name/value pairs
+ *
+ * *head_p and *tail_p must be initialized, either to NULL or valid pointers
+ * to a ConfigVariable list, before calling the outer recursion level.  Any
+ * name-value pairs read from the input file(s) will be appended to the list.
+ * Error reports will also be appended to the list, if elevel < ERROR.
+ *
+ * Returns TRUE if successful, FALSE if an error occurred.
+ *
+ * Note: this function is used to parse not only postgresql.conf, but
+ * various other configuration files that use the same "name = value"
+ * syntax.  Hence, do not do anything here or in the subsidiary routines
+ * ParseConfigFile/ParseConfigDirectory that assumes we are processing
+ * GUCs specifically.
+ */
+bool
+ParseConfigFp(FILE *fp, const char *config_file, int depth, int elevel,
+			  ConfigVariable **head_p, ConfigVariable **tail_p)
+{
+	volatile bool OK = true;
+	unsigned int save_ConfigFileLineno = ConfigFileLineno;
+	sigjmp_buf *save_GUC_flex_fatal_jmp = GUC_flex_fatal_jmp;
+	sigjmp_buf	flex_fatal_jmp;
+	volatile YY_BUFFER_STATE lex_buffer = NULL;
+	int			errorcount;
+	int			token;
+
+	if (sigsetjmp(flex_fatal_jmp, 1) == 0)
+		GUC_flex_fatal_jmp = &flex_fatal_jmp;
+	else
+	{
+		/*
+		 * Regain control after a fatal, internal flex error.  It may have
+		 * corrupted parser state.  Consequently, abandon the file, but trust
+		 * that the state remains sane enough for yy_delete_buffer().
+		 */
+		pg_log(PG_WARNING, "%s at file \"%s\" line %u\n", GUC_flex_fatal_errmsg,
+				config_file, ConfigFileLineno);
+		record_config_file_error(GUC_flex_fatal_errmsg,
+								 config_file, ConfigFileLineno,
+								 head_p, tail_p);
+		OK = false;
+		goto cleanup;
+	}
+
+	/*
+	 * Parse
+	 */
+	ConfigFileLineno = 1;
+	errorcount = 0;
+
+	lex_buffer = yy_create_buffer(fp, YY_BUF_SIZE);
+	yy_switch_to_buffer(lex_buffer);
+
+	/* This loop iterates once per logical line */
+	while ((token = yylex()))
+	{
+		char	   *opt_name = NULL;
+		char	   *opt_value = NULL;
+		ConfigVariable *item;
+
+		if (token == GUC_EOL)	/* empty or comment line */
+			continue;
+
+		/* first token on line is option name */
+		if (token != GUC_ID && token != GUC_QUALIFIED_ID)
+			goto parse_error;
+		opt_name = pstrdup(yytext);
+
+		/* next we have an optional equal sign; discard if present */
+		token = yylex();
+		if (token == GUC_EQUALS)
+			token = yylex();
+
+		/* now we must have the option value */
+		if (token != GUC_ID &&
+			token != GUC_STRING &&
+			token != GUC_INTEGER &&
+			token != GUC_REAL &&
+			token != GUC_UNQUOTED_STRING)
+			goto parse_error;
+		if (token == GUC_STRING)	/* strip quotes and escapes */
+			opt_value = GUC_scanstr(yytext);
+		else
+			opt_value = pstrdup(yytext);
+
+		/* now we'd like an end of line, or possibly EOF */
+		token = yylex();
+		if (token != GUC_EOL)
+		{
+			if (token != 0)
+				goto parse_error;
+			/* treat EOF like \n for line numbering purposes, cf bug 4752 */
+			ConfigFileLineno++;
+		}
+
+		/* OK, process the option name and value */
+		if (guc_name_compare(opt_name, "include_dir") == 0)
+		{
+			/*
+			 * An include_dir directive isn't a variable and should be
+			 * processed immediately.
+			 */
+			if (!ParseConfigDirectory(opt_value,
+									  config_file, ConfigFileLineno - 1,
+									  depth + 1, elevel,
+									  head_p, tail_p))
+				OK = false;
+			yy_switch_to_buffer(lex_buffer);
+			pfree(opt_name);
+			pfree(opt_value);
+		}
+		else if (guc_name_compare(opt_name, "include_if_exists") == 0)
+		{
+			/*
+			 * An include_if_exists directive isn't a variable and should be
+			 * processed immediately.
+			 */
+			if (!ParseConfigFile(opt_value, false,
+								 config_file, ConfigFileLineno - 1,
+								 depth + 1, elevel,
+								 head_p, tail_p))
+				OK = false;
+			yy_switch_to_buffer(lex_buffer);
+			pfree(opt_name);
+			pfree(opt_value);
+		}
+		else if (guc_name_compare(opt_name, "include") == 0)
+		{
+			/*
+			 * An include directive isn't a variable and should be processed
+			 * immediately.
+			 */
+			if (!ParseConfigFile(opt_value, true,
+								 config_file, ConfigFileLineno - 1,
+								 depth + 1, elevel,
+								 head_p, tail_p))
+				OK = false;
+			yy_switch_to_buffer(lex_buffer);
+			pfree(opt_name);
+			pfree(opt_value);
+		}
+		else
+		{
+			/* ordinary variable, append to list */
+			item = palloc(sizeof *item);
+			item->name = opt_name;
+			item->value = opt_value;
+			item->errmsg = NULL;
+			item->filename = pstrdup(config_file);
+			item->sourceline = ConfigFileLineno - 1;
+			item->ignore = false;
+			item->applied = false;
+			item->next = NULL;
+			if (*head_p == NULL)
+				*head_p = item;
+			else
+				(*tail_p)->next = item;
+			*tail_p = item;
+		}
+
+		/* break out of loop if read EOF, else loop for next line */
+		if (token == 0)
+			break;
+		continue;
+
+parse_error:
+		/* release storage if we allocated any on this line */
+		if (opt_name)
+			pfree(opt_name);
+		if (opt_value)
+			pfree(opt_value);
+
+		/* report the error */
+		if (token == GUC_EOL || token == 0)
+		{
+			pg_log(PG_WARNING, "syntax error in file \"%s\" line %u, near end of line\n",
+					config_file, ConfigFileLineno - 1);
+			record_config_file_error("syntax error",
+									 config_file, ConfigFileLineno - 1,
+									 head_p, tail_p);
+		}
+		else
+		{
+			pg_log(PG_WARNING, "syntax error in file \"%s\" line %u, near token \"%s\"\n",
+				config_file, ConfigFileLineno, yytext);
+			record_config_file_error("syntax error",
+									 config_file, ConfigFileLineno,
+									 head_p, tail_p);
+		}
+		OK = false;
+		errorcount++;
+
+		/*
+		 * To avoid producing too much noise when fed a totally bogus file,
+		 * give up after 100 syntax errors per file (an arbitrary number).
+		 */
+		if (errorcount >= 100)
+		{
+			pg_log(PG_WARNING, "too many syntax errors found, abandoning file \"%s\"\n",
+					config_file);
+			break;
+		}
+
+		/* resync to next end-of-line or EOF */
+		while (token != GUC_EOL && token != 0)
+			token = yylex();
+		/* break out of loop on EOF */
+		if (token == 0)
+			break;
+	}
+
+cleanup:
+	yy_delete_buffer(lex_buffer);
+	/* Each recursion level must save and restore these static variables. */
+	ConfigFileLineno = save_ConfigFileLineno;
+	GUC_flex_fatal_jmp = save_GUC_flex_fatal_jmp;
+	return OK;
+}
+
+/*
+ * Read and parse all config files in a subdirectory in alphabetical order
+ *
+ * includedir is the absolute or relative path to the subdirectory to scan.
+ *
+ * calling_file/calling_lineno identify the source of the request.
+ * Pass NULL/0 if not recursing from an inclusion request.
+ *
+ * See ParseConfigFp for further details.
+ */
+bool
+ParseConfigDirectory(const char *includedir,
+					 const char *calling_file, int calling_lineno,
+					 int depth, int elevel,
+					 ConfigVariable **head_p,
+					 ConfigVariable **tail_p)
+{
+	char	   *directory;
+	DIR		   *d;
+	struct dirent *de;
+	char	  **filenames;
+	int			num_filenames;
+	int			size_filenames;
+	bool		status;
+
+	directory = AbsoluteConfigLocation(includedir, calling_file);
+	d = opendir(directory);
+	if (d == NULL)
+	{
+		pg_log(PG_WARNING, "could not open configuration directory \"%s\"\n",
+			directory);
+		record_config_file_error(psprintf("could not open directory \"%s\"",
+										  directory),
+								 calling_file, calling_lineno,
+								 head_p, tail_p);
+		status = false;
+		goto cleanup;
+	}
+
+	/*
+	 * Read the directory and put the filenames in an array, so we can sort
+	 * them prior to processing the contents.
+	 */
+	size_filenames = 32;
+	filenames = (char **) palloc(size_filenames * sizeof(char *));
+	num_filenames = 0;
+
+	while ((de = readdir(d)) != NULL)
+	{
+		struct stat st;
+		char		filename[MAXPGPATH];
+
+		/*
+		 * Only parse files with names ending in ".conf".  Explicitly reject
+		 * files starting with ".".  This excludes things like "." and "..",
+		 * as well as typical hidden files, backup files, and editor debris.
+		 */
+		if (strlen(de->d_name) < 6)
+			continue;
+		if (de->d_name[0] == '.')
+			continue;
+		if (strcmp(de->d_name + strlen(de->d_name) - 5, ".conf") != 0)
+			continue;
+
+		join_path_components(filename, directory, de->d_name);
+		canonicalize_path(filename);
+		if (stat(filename, &st) == 0)
+		{
+			if (!S_ISDIR(st.st_mode))
+			{
+				/* Add file to array, increasing its size in blocks of 32 */
+				if (num_filenames >= size_filenames)
+				{
+					size_filenames += 32;
+					filenames = (char **) repalloc(filenames,
+											size_filenames * sizeof(char *));
+				}
+				filenames[num_filenames] = pstrdup(filename);
+				num_filenames++;
+			}
+		}
+		else
+		{
+			/*
+			 * stat does not care about permissions, so the most likely reason
+			 * a file can't be accessed now is if it was removed between the
+			 * directory listing and now.
+			 */
+			pg_log(PG_WARNING, "could not stat file \"%s\"\n", filename);
+			record_config_file_error(psprintf("could not stat file \"%s\"",
+											  filename),
+									 calling_file, calling_lineno,
+									 head_p, tail_p);
+			status = false;
+			goto cleanup;
+		}
+	}
+
+	if (num_filenames > 0)
+	{
+		int			i;
+
+		qsort(filenames, num_filenames, sizeof(char *), pg_qsort_strcmp);
+		for (i = 0; i < num_filenames; i++)
+		{
+			if (!ParseConfigFile(filenames[i], true,
+								 calling_file, calling_lineno,
+								 depth, elevel,
+								 head_p, tail_p))
+			{
+				status = false;
+				goto cleanup;
+			}
+		}
+	}
+	status = true;
+
+cleanup:
+	if (d)
+		closedir(d);
+	pfree(directory);
+	return status;
+}
+
+/*
+ * Free a list of ConfigVariables, including the names and the values
+ */
+void
+FreeConfigVariables(ConfigVariable *list)
+{
+	ConfigVariable *item;
+
+	item = list;
+	while (item)
+	{
+		ConfigVariable *next = item->next;
+
+		FreeConfigVariable(item);
+		item = next;
+	}
+}
+
+/*
+ * Free a single ConfigVariable
+ */
+static void
+FreeConfigVariable(ConfigVariable *item)
+{
+	if (item->name)
+		pfree(item->name);
+	if (item->value)
+		pfree(item->value);
+	if (item->errmsg)
+		pfree(item->errmsg);
+	if (item->filename)
+		pfree(item->filename);
+	pfree(item);
+}
+
+
+/*
+ *		scanstr
+ *
+ * Strip the quotes surrounding the given string, and collapse any embedded
+ * '' sequences and backslash escapes.
+ *
+ * the string returned is palloc'd and should eventually be pfree'd by the
+ * caller.
+ */
+static char *
+GUC_scanstr(const char *s)
+{
+	char	   *newStr;
+	int			len,
+				i,
+				j;
+
+	Assert(s != NULL && s[0] == '\'');
+	len = strlen(s);
+	Assert(len >= 2);
+	Assert(s[len - 1] == '\'');
+
+	/* Skip the leading quote; we'll handle the trailing quote below */
+	s++, len--;
+
+	/* Since len still includes trailing quote, this is enough space */
+	newStr = palloc(len);
+
+	for (i = 0, j = 0; i < len; i++)
+	{
+		if (s[i] == '\\')
+		{
+			i++;
+			switch (s[i])
+			{
+				case 'b':
+					newStr[j] = '\b';
+					break;
+				case 'f':
+					newStr[j] = '\f';
+					break;
+				case 'n':
+					newStr[j] = '\n';
+					break;
+				case 'r':
+					newStr[j] = '\r';
+					break;
+				case 't':
+					newStr[j] = '\t';
+					break;
+				case '0':
+				case '1':
+				case '2':
+				case '3':
+				case '4':
+				case '5':
+				case '6':
+				case '7':
+					{
+						int			k;
+						long		octVal = 0;
+
+						for (k = 0;
+							 s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
+							 k++)
+							octVal = (octVal << 3) + (s[i + k] - '0');
+						i += k - 1;
+						newStr[j] = ((char) octVal);
+					}
+					break;
+				default:
+					newStr[j] = s[i];
+					break;
+			}					/* switch */
+		}
+		else if (s[i] == '\'' && s[i + 1] == '\'')
+		{
+			/* doubled quote becomes just one quote */
+			newStr[j] = s[++i];
+		}
+		else
+			newStr[j] = s[i];
+		j++;
+	}
+
+	/* We copied the ending quote to newStr, so replace with \0 */
+	Assert(j > 0 && j <= len);
+	newStr[--j] = '\0';
+
+	return newStr;
+}

Reply via email to