On Wed, Feb 19, 2020 at 12:37:00PM -0600, David Steele wrote:
> On 2/19/20 2:13 AM, Michael Paquier wrote:
>> Please note that pg_internal.init is listed within noChecksumFiles in
>> basebackup.c, so we would miss any temporary pg_internal.init.PID if
>> we don't check after the file prefix and the base backup would issue
>> extra WARNING messages, potentially masking messages that could
>> matter.  So let's fix that as well.
> 
> Agreed.  Though, I think pg_internal.init.XX should be excluded from the
> backup as well.

Sure.  That's the intention.  pg_rewind, pg_checksums and basebackup.c
are all the things on my list.

> As far as I can see, the pg_internal.init.XX will not be cleaned up by
> PostgreSQL on startup.  I've only tested this in 9.6 so far, but I don't see
> any differences in the code since then that would lead to better behavior.
> Perhaps that's also something we should fix?

Not sure that it is worth spending cycles on that at the beginning of
recovery as when a mapping file is written its temporary entry
truncates any existing one present matching its name.

> I'm really not a fan of a blind prefix match.  I think we should stick with
> only excluding files that are created by Postgres.

Thinking more on that, excluding any backup_label with a custom suffix
worries me as it could cause a potential breakage for exiting backup
solutions.  So attached is an updated patch making things in a
smarter way: I have added to the exclusion lists the possibility to
match an entry based on its prefix, or not, the choice being optional.
This solves the problem with pg_internal.PID and is careful to not
exclude unnecessary entries like suffixed backup labels or such.  This
leads to some extra duplication within pg_rewind, basebackup.c and
pg_checksums but I think we can live with that, and that makes
back-patching simpler.  Refactoring is still tricky though as it
relates to the use of paths across the backend and the frontend..

> So backup_label.old and
> tablespace_map.old should just be added to the exclude list.  That's how we
> have it in pgBackRest.

That would be a behavior change.  We could change that on HEAD, but I
don't think that this can be back-patched as this does not cause an
actual problem.

For now, my proposal is to fix the prefix first, and then let's look
at the business with tablespaces where needed. 
--
Michael
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index dea8aab45e..c03bc0c00b 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -121,6 +121,18 @@ static long long int total_checksum_failures;
 /* Do not verify checksums. */
 static bool noverify_checksums = false;
 
+/*
+ * Definition of one item part of an exclusion list, used for checksum
+ * or base backup items.  "name" is the name of the file or path to
+ * check for exclusion.  If "match_prefix" is true, any items matching the
+ * name as prefix are excluded.
+ */
+struct exclude_list_item
+{
+	const char *name;
+	bool		match_prefix;
+};
+
 /*
  * The contents of these directories are removed or recreated during server
  * start so they are not included in backups.  The directories themselves are
@@ -170,16 +182,19 @@ static const char *const excludeDirContents[] =
 /*
  * List of files excluded from backups.
  */
-static const char *const excludeFiles[] =
+static const struct exclude_list_item excludeFiles[] =
 {
 	/* Skip auto conf temporary file. */
-	PG_AUTOCONF_FILENAME ".tmp",
+	{PG_AUTOCONF_FILENAME ".tmp", false},
 
 	/* Skip current log file temporary file */
-	LOG_METAINFO_DATAFILE_TMP,
+	{LOG_METAINFO_DATAFILE_TMP, false},
 
-	/* Skip relation cache because it is rebuilt on startup */
-	RELCACHE_INIT_FILENAME,
+	/*
+	 * Skip relation cache because it is rebuilt on startup.  This includes
+	 * temporary files.
+	 */
+	{RELCACHE_INIT_FILENAME, true},
 
 	/*
 	 * If there's a backup_label or tablespace_map file, it belongs to a
@@ -187,14 +202,14 @@ static const char *const excludeFiles[] =
 	 * for this backup.  Our backup_label/tablespace_map is injected into the
 	 * tar separately.
 	 */
-	BACKUP_LABEL_FILE,
-	TABLESPACE_MAP,
+	{BACKUP_LABEL_FILE, false},
+	{TABLESPACE_MAP, false},
 
-	"postmaster.pid",
-	"postmaster.opts",
+	{"postmaster.pid", false},
+	{"postmaster.opts", false},
 
 	/* end of list */
-	NULL
+	{NULL, false}
 };
 
 /*
@@ -203,16 +218,15 @@ static const char *const excludeFiles[] =
  * Note: this list should be kept in sync with what pg_checksums.c
  * includes.
  */
-static const char *const noChecksumFiles[] = {
-	"pg_control",
-	"pg_filenode.map",
-	"pg_internal.init",
-	"PG_VERSION",
+static const struct exclude_list_item noChecksumFiles[] = {
+	{"pg_control", false},
+	{"pg_filenode.map", false},
+	{"pg_internal.init", true},
+	{"PG_VERSION", false},
 #ifdef EXEC_BACKEND
-	"config_exec_params",
-	"config_exec_params.new",
+	{"config_exec_params", true},
 #endif
-	NULL,
+	{NULL, false}
 };
 
 /*
@@ -1082,13 +1096,27 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 
 		/* Scan for files that should be excluded */
 		excludeFound = false;
-		for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
+		for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
 		{
-			if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
+			if (excludeFiles[excludeIdx].match_prefix)
 			{
-				elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
-				excludeFound = true;
-				break;
+				if (strncmp(de->d_name, excludeFiles[excludeIdx].name,
+							strlen(excludeFiles[excludeIdx].name)) == 0)
+				{
+					elog(DEBUG1, "file \"%s\" matching prefix \"%s\" excluded from backup",
+						 de->d_name, excludeFiles[excludeIdx].name);
+					excludeFound = true;
+					break;
+				}
+			}
+			else
+			{
+				if (strcmp(de->d_name, excludeFiles[excludeIdx].name) == 0)
+				{
+					elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
+					excludeFound = true;
+					break;
+				}
 			}
 		}
 
@@ -1317,17 +1345,28 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
 static bool
 is_checksummed_file(const char *fullpath, const char *filename)
 {
-	const char *const *f;
-
 	/* Check that the file is in a tablespace */
 	if (strncmp(fullpath, "./global/", 9) == 0 ||
 		strncmp(fullpath, "./base/", 7) == 0 ||
 		strncmp(fullpath, "/", 1) == 0)
 	{
-		/* Compare file against noChecksumFiles skiplist */
-		for (f = noChecksumFiles; *f; f++)
-			if (strcmp(*f, filename) == 0)
-				return false;
+		int			excludeIdx;
+
+		/* Compare file against noChecksumFiles skip list */
+		for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
+		{
+			if (noChecksumFiles[excludeIdx].match_prefix)
+			{
+				if (strncmp(filename, noChecksumFiles[excludeIdx].name,
+							strlen(noChecksumFiles[excludeIdx].name)) == 0)
+					return false;
+			}
+			else
+			{
+				if (strcmp(noChecksumFiles[excludeIdx].name, filename) == 0)
+					return false;
+			}
+		}
 
 		return true;
 	}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index b7d36b65dd..3c70499feb 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
 use File::Path qw(rmtree);
 use PostgresNode;
 use TestLib;
-use Test::More tests => 106;
+use Test::More tests => 107;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -65,8 +65,8 @@ $node->restart;
 
 # Write some files to test that they are not copied.
 foreach my $filename (
-	qw(backup_label tablespace_map postgresql.auto.conf.tmp current_logfiles.tmp)
-  )
+	qw(backup_label tablespace_map postgresql.auto.conf.tmp
+	current_logfiles.tmp global/pg_internal.init.123))
 {
 	open my $file, '>>', "$pgdata/$filename";
 	print $file "DONOTCOPY";
@@ -135,7 +135,7 @@ foreach my $dirname (
 # These files should not be copied.
 foreach my $filename (
 	qw(postgresql.auto.conf.tmp postmaster.opts postmaster.pid tablespace_map current_logfiles.tmp
-	global/pg_internal.init))
+	global/pg_internal.init global/pg_internal.init.123))
 {
 	ok(!-f "$tempdir/backup/$filename", "$filename not copied");
 }
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 46ee1f1dc3..4c8eb33698 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -91,21 +91,32 @@ usage(void)
 	printf(_("Report bugs to <pgsql-b...@lists.postgresql.org>.\n"));
 }
 
+/*
+ * Definition of one item part of an exclusion list, used for file
+ * entries.  "name" is the name of the file or path to check for
+ * exclusion.  If "match_prefix" is true, any items matching the name
+ * as prefix are excluded.
+ */
+struct exclude_list_item
+{
+	const char *name;
+	bool		match_prefix;
+};
+
 /*
  * List of files excluded from checksum validation.
  *
  * Note: this list should be kept in sync with what basebackup.c includes.
  */
-static const char *const skip[] = {
-	"pg_control",
-	"pg_filenode.map",
-	"pg_internal.init",
-	"PG_VERSION",
+static const struct exclude_list_item skip[] = {
+	{"pg_control", false},
+	{"pg_filenode.map", false},
+	{"pg_internal.init", true},
+	{"PG_VERSION", false},
 #ifdef EXEC_BACKEND
-	"config_exec_params",
-	"config_exec_params.new",
+	{"config_exec_params", true},
 #endif
-	NULL,
+	{NULL, false}
 };
 
 /*
@@ -157,11 +168,22 @@ progress_report(bool force)
 static bool
 skipfile(const char *fn)
 {
-	const char *const *f;
+	int			excludeIdx;
 
-	for (f = skip; *f; f++)
-		if (strcmp(*f, fn) == 0)
-			return true;
+	for (excludeIdx = 0; skip[excludeIdx].name != NULL; excludeIdx++)
+	{
+		if (skip[excludeIdx].match_prefix)
+		{
+			if (strncmp(skip[excludeIdx].name, fn,
+						strlen(skip[excludeIdx].name)) == 0)
+				return true;
+		}
+		else
+		{
+			if (strcmp(skip[excludeIdx].name, fn) == 0)
+				return true;
+		}
+	}
 
 	return false;
 }
diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl
index 59228b916c..83a730ea94 100644
--- a/src/bin/pg_checksums/t/002_actions.pl
+++ b/src/bin/pg_checksums/t/002_actions.pl
@@ -111,7 +111,9 @@ append_to_file "$pgdata/global/99999_vm.123",   "";
 # should be ignored by the scan.
 append_to_file "$pgdata/global/pgsql_tmp_123", "foo";
 mkdir "$pgdata/global/pgsql_tmp";
-append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo";
+append_to_file "$pgdata/global/pgsql_tmp/1.1",        "foo";
+append_to_file "$pgdata/global/pg_internal.init",     "foo";
+append_to_file "$pgdata/global/pg_internal.init.123", "foo";
 
 # Enable checksums.
 command_ok([ 'pg_checksums', '--enable', '--no-sync', '-D', $pgdata ],
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index fd14844eec..67977ada99 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -30,6 +30,18 @@ static int	final_filemap_cmp(const void *a, const void *b);
 static void filemap_list_to_array(filemap_t *map);
 static bool check_file_excluded(const char *path, bool is_source);
 
+/*
+ * Definition of one item part of an exclusion list, used for file
+ * entries.  "name" is the name of the file or path to check for exclusion.
+ * If "match_prefix" is true, any items matching the name as prefix
+ * are excluded.
+ */
+struct exclude_list_item
+{
+	const char *name;
+	bool		match_prefix;
+};
+
 /*
  * The contents of these directories are removed or recreated during server
  * start so they are not included in data processed by pg_rewind.
@@ -78,32 +90,34 @@ static const char *excludeDirContents[] =
 };
 
 /*
- * List of files excluded from filemap processing.
+ * List of files excluded from filemap processing.   Files are excluded
+ * if their prefix match.
  */
-static const char *excludeFiles[] =
+static const struct exclude_list_item excludeFiles[] =
 {
 	/* Skip auto conf temporary file. */
-	"postgresql.auto.conf.tmp", /* defined as PG_AUTOCONF_FILENAME */
+	{"postgresql.auto.conf.tmp", false},	/* defined as PG_AUTOCONF_FILENAME */
 
 	/* Skip current log file temporary file */
-	"current_logfiles.tmp",		/* defined as LOG_METAINFO_DATAFILE_TMP */
+	{"current_logfiles.tmp", false},	/* defined as
+										 * LOG_METAINFO_DATAFILE_TMP */
 
 	/* Skip relation cache because it is rebuilt on startup */
-	"pg_internal.init",			/* defined as RELCACHE_INIT_FILENAME */
+	{"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
 
 	/*
 	 * If there's a backup_label or tablespace_map file, it belongs to a
 	 * backup started by the user with pg_start_backup().  It is *not* correct
 	 * for this backup.  Our backup_label is written later on separately.
 	 */
-	"backup_label",				/* defined as BACKUP_LABEL_FILE */
-	"tablespace_map",			/* defined as TABLESPACE_MAP */
+	{"backup_label", false},	/* defined as BACKUP_LABEL_FILE */
+	{"tablespace_map", false},	/* defined as TABLESPACE_MAP */
 
-	"postmaster.pid",
-	"postmaster.opts",
+	{"postmaster.pid", false},
+	{"postmaster.opts", false},
 
 	/* end of list */
-	NULL
+	{NULL, false}
 };
 
 /*
@@ -496,22 +510,40 @@ check_file_excluded(const char *path, bool is_source)
 	const char *filename;
 
 	/* check individual files... */
-	for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
+	for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
 	{
 		filename = last_dir_separator(path);
 		if (filename == NULL)
 			filename = path;
 		else
 			filename++;
-		if (strcmp(filename, excludeFiles[excludeIdx]) == 0)
+
+		if (excludeFiles[excludeIdx].match_prefix)
 		{
-			if (is_source)
-				pg_log_debug("entry \"%s\" excluded from source file list",
-							 path);
-			else
-				pg_log_debug("entry \"%s\" excluded from target file list",
-							 path);
-			return true;
+			if (strncmp(filename, excludeFiles[excludeIdx].name,
+						strlen(excludeFiles[excludeIdx].name)) == 0)
+			{
+				if (is_source)
+					pg_log_debug("entry \"%s\" matching prefix \"%s\" excluded from source file list",
+								 path, excludeFiles[excludeIdx].name);
+				else
+					pg_log_debug("entry \"%s\" matching prefix \"%s\" excluded from target file list",
+								 path, excludeFiles[excludeIdx].name);
+				return true;
+			}
+		}
+		else
+		{
+			if (strcmp(filename, excludeFiles[excludeIdx].name) == 0)
+			{
+				if (is_source)
+					pg_log_debug("entry \"%s\" excluded from source file list",
+								 path);
+				else
+					pg_log_debug("entry \"%s\" excluded from target file list",
+								 path);
+				return true;
+			}
 		}
 	}
 

Attachment: signature.asc
Description: PGP signature

Reply via email to