Hi Konstantin,

On Thu, Aug 30, 2018 at 11:27:31AM -0700, Michael Paquier wrote:
> It seems to me that you are right here, "path" points to
> pg_replslot/$SLOTNAME/state which is a file so the fsync is incorrect.
> I am not sure that we'd want to publish fsync_parent_path out of fd.c
> though, so perhaps we could just save the slot path in a different
> variable and use it?

I have spent more time on this bug, and the code path you have pointed
at is the only one having such an issue.  Attached is a patch to fix the
problem.  It includes the sanity checks I have used to check all code
paths calling fsync_fname() for both the frontend and the backend code.

The checks will not be included in the final fix, still they look useful
so I am planning to start a new thread on the matter as perhaps other
folks have more and/or better ideas.
--
Michael
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 19978d9a9e..4f30904141 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1352,6 +1352,7 @@ RestoreSlotFromDisk(const char *name)
 {
 	ReplicationSlotOnDisk cp;
 	int			i;
+	char		slotdir[MAXPGPATH];
 	char		path[MAXPGPATH + 22];
 	int			fd;
 	bool		restored = false;
@@ -1361,13 +1362,14 @@ RestoreSlotFromDisk(const char *name)
 	/* no need to lock here, no concurrent access allowed yet */
 
 	/* delete temp file if it exists */
-	sprintf(path, "pg_replslot/%s/state.tmp", name);
+	sprintf(slotdir, "pg_replslot/%s", name);
+	sprintf(path, "%s/state.tmp", slotdir);
 	if (unlink(path) < 0 && errno != ENOENT)
 		ereport(PANIC,
 				(errcode_for_file_access(),
 				 errmsg("could not remove file \"%s\": %m", path)));
 
-	sprintf(path, "pg_replslot/%s/state", name);
+	sprintf(path, "%s/state", slotdir);
 
 	elog(DEBUG1, "restoring replication slot from \"%s\"", path);
 
@@ -1402,7 +1404,7 @@ RestoreSlotFromDisk(const char *name)
 
 	/* Also sync the parent directory */
 	START_CRIT_SECTION();
-	fsync_fname(path, true);
+	fsync_fname(slotdir, true);
 	END_CRIT_SECTION();
 
 	/* read part of statefile that's guaranteed to be version independent */
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 8dd51f1767..49a5640c61 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -574,6 +574,14 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
 void
 fsync_fname(const char *fname, bool isdir)
 {
+#ifdef USE_ASSERT_CHECKING
+	struct stat statbuf;
+
+	stat(fname, &statbuf);
+	Assert((isdir && S_ISDIR(statbuf.st_mode)) ||
+		   (!isdir && !S_ISDIR(statbuf.st_mode)));
+#endif
+
 	fsync_fname_ext(fname, isdir, false, ERROR);
 }
 
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 48876061c3..36095b01af 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -266,6 +266,14 @@ fsync_fname(const char *fname, bool isdir, const char *progname)
 	int			flags;
 	int			returncode;
 
+#ifdef USE_ASSERT_CHECKING
+	struct stat statbuf;
+
+	stat(fname, &statbuf);
+	Assert((isdir && S_ISDIR(statbuf.st_mode)) ||
+		   (!isdir && !S_ISDIR(statbuf.st_mode)));
+#endif
+
 	/*
 	 * Some OSs require directories to be opened read-only whereas other
 	 * systems don't allow us to fsync files opened read-only; so we need both

Attachment: signature.asc
Description: PGP signature

Reply via email to