At 2015-05-27 11:46:39 +0530, a...@2ndquadrant.com wrote:
>
> I'm trying a couple of approaches to that (e.g. using readdir directly
> instead of ReadDir), but other suggestions are welcome.

Here's what that looks like, but not yet fully tested.

-- Abhijit
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 087b6be..6956e88 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -11609,16 +11609,41 @@ SetWalWriterSleeping(bool sleeping)
 static void
 fsync_pgdata(char *datadir)
 {
+	bool		xlog_is_symlink;
+	char		pg_xlog[MAXPGPATH];
+	char		pg_tblspc[MAXPGPATH];
+	struct stat st;
+
 	if (!enableFsync)
 		return;
 
+	snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", datadir);
+	snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", datadir);
+
+	if (lstat(pg_xlog, &st) < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				errmsg("could not stat directory \"pg_xlog\": %m")));
+
+	xlog_is_symlink = false;
+#ifndef WIN32
+	if (S_ISLNK(st.st_mode))
+		xlog_is_symlink = true;
+#else
+	if (pgwin32_is_junction(subpath))
+		xlog_is_symlink = true;
+#endif
+
 	/*
 	 * If possible, hint to the kernel that we're soon going to fsync the data
 	 * directory and its contents.
 	 */
 #if defined(HAVE_SYNC_FILE_RANGE) || \
 	(defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED))
-	walkdir(datadir, pre_sync_fname);
+	walkdir(DEBUG1, datadir, pre_sync_fname);
+	if (xlog_is_symlink)
+		walkdir(DEBUG1, pg_xlog, pre_sync_fname);
+	walktblspc_links(DEBUG1, pg_tblspc, pre_sync_fname);
 #endif
 
 	/*
@@ -11628,5 +11653,8 @@ fsync_pgdata(char *datadir)
 	 * file fsyncs don't guarantee that the directory entry for the file is
 	 * synced.
 	 */
-	walkdir(datadir, fsync_fname);
+	walkdir(WARNING, datadir, fsync_fname_ext);
+	if (xlog_is_symlink)
+		walkdir(DEBUG1, pg_xlog, fsync_fname_ext);
+	walktblspc_links(WARNING, pg_tblspc, fsync_fname_ext);
 }
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 68d43c6..711b4b7 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2443,48 +2443,113 @@ looks_like_temp_rel_name(const char *name)
 /*
  * Hint to the OS that it should get ready to fsync() this file.
  *
- * Adapted from pre_sync_fname in initdb.c
+ * Ignores errors trying to open unreadable files, and logs other errors at a
+ * caller-specified level.
  */
 void
-pre_sync_fname(char *fname, bool isdir)
+pre_sync_fname(int elevel, char *fname, bool isdir)
 {
 	int			fd;
 
 	fd = BasicOpenFile(fname, O_RDONLY | PG_BINARY, 0);
 
+	if (fd < 0)
+	{
+		if (errno == EACCES || (isdir && errno == EISDIR))
+			return;
+
+#ifdef ETXTBSY
+		if (errno == ETXTBSY)
+			return;
+#endif
+
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", fname)));
+	}
+
+	(void) pg_flush_data(fd, 0, 0);
+	(void) close(fd);
+}
+
+/*
+ * fsync_fname_ext -- Try to fsync a file or directory
+ *
+ * Ignores errors trying to open unreadable files, or trying to fsync
+ * directories on systems where that isn't allowed/required, and logs other
+ * errors at a caller-specified level.
+ */
+void
+fsync_fname_ext(int elevel, char *fname, bool isdir)
+{
+	int			fd;
+	int			returncode;
+
 	/*
-	 * Some OSs don't allow us to open directories at all (Windows returns
-	 * EACCES)
+	 * Some OSs require directories to be opened read-only whereas other
+	 * systems don't allow us to fsync files opened read-only; so we need both
+	 * cases here
 	 */
-	if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES))
-		return;
+	if (!isdir)
+		fd = OpenTransientFile(fname,
+							   O_RDWR | PG_BINARY,
+							   S_IRUSR | S_IWUSR);
+	else
+		fd = OpenTransientFile(fname,
+							   O_RDONLY | PG_BINARY,
+							   S_IRUSR | S_IWUSR);
 
 	if (fd < 0)
-		ereport(FATAL,
-				(errmsg("could not open file \"%s\": %m",
-						fname)));
+	{
+		if (errno == EACCES || (isdir && errno == EISDIR))
+			return;
 
-	pg_flush_data(fd, 0, 0);
+#ifdef ETXTBSY
+		if (errno == ETXTBSY)
+			return;
+#endif
 
-	close(fd);
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", fname)));
+	}
+
+	returncode = pg_fsync(fd);
+
+	/*
+	 * Some OSes don't allow us to fsync directories at all, so we can ignore
+	 * those errors. Anything else needs to be logged.
+	 */
+	if (returncode != 0 && !(isdir && errno == EBADF))
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", fname)));
+
+	CloseTransientFile(fd);
 }
 
 /*
  * walkdir: recursively walk a directory, applying the action to each
- * regular file and directory (including the named directory itself)
- * and following symbolic links.
+ * regular file and directory (including the named directory itself).
  *
- * NB: There is another version of walkdir in initdb.c, but that version
- * behaves differently with respect to symbolic links.  Caveat emptor!
+ * Adapted from walkdir in initdb.c
  */
 void
-walkdir(char *path, void (*action) (char *fname, bool isdir))
+walkdir(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir))
 {
 	DIR		   *dir;
 	struct dirent *de;
 
 	dir = AllocateDir(path);
-	while ((de = ReadDir(dir, path)) != NULL)
+	if (dir == NULL)
+	{
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not open directory \"%s\": %m", path)));
+		return;
+	}
+
+	while (errno = 0, (de = readdir(dir)) != NULL)
 	{
 		char		subpath[MAXPGPATH];
 		struct stat fst;
@@ -2498,59 +2563,66 @@ walkdir(char *path, void (*action) (char *fname, bool isdir))
 		snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name);
 
 		if (lstat(subpath, &fst) < 0)
-			ereport(ERROR,
+			ereport(elevel,
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", subpath)));
 
 		if (S_ISREG(fst.st_mode))
-			(*action) (subpath, false);
+			(*action) (elevel, subpath, false);
 		else if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action);
-#ifndef WIN32
-		else if (S_ISLNK(fst.st_mode))
-#else
-		else if (pgwin32_is_junction(subpath))
-#endif
-		{
-#if defined(HAVE_READLINK) || defined(WIN32)
-			char		linkpath[MAXPGPATH];
-			int			len;
-			struct stat lst;
+			walkdir(elevel, subpath, action);
+	}
 
-			len = readlink(subpath, linkpath, sizeof(linkpath) - 1);
-			if (len < 0)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not read symbolic link \"%s\": %m",
-								subpath)));
+	if (errno)
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not read directory \"%s\": %m", path)));
 
-			if (len >= sizeof(linkpath) - 1)
-				ereport(ERROR,
-						(errmsg("symbolic link \"%s\" target is too long",
-								subpath)));
+	FreeDir(dir);
 
-			linkpath[len] = '\0';
+	(*action) (elevel, path, true);
+}
 
-			if (lstat(linkpath, &lst) == 0)
-			{
-				if (S_ISREG(lst.st_mode))
-					(*action) (linkpath, false);
-				else if (S_ISDIR(lst.st_mode))
-					walkdir(subpath, action);
-			}
-			else if (errno != ENOENT)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not stat file \"%s\": %m", linkpath)));
-#else
-			ereport(WARNING,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("this platform does not support symbolic links; ignoring \"%s\"",
-							subpath)));
-#endif
-		}
+/*
+ * walktblspc_links: call walkdir on each entry under the given
+ * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist.
+ *
+ * Adapted from walktblspc_links in initdb.c
+ */
+void
+walktblspc_links(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir))
+{
+	DIR		   *dir;
+	struct dirent *de;
+	char		subpath[MAXPGPATH];
+
+	dir = AllocateDir(path);
+	if (dir == NULL)
+	{
+		if (errno != ENOENT)
+			ereport(elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not open directory \"%s\": %m", path)));
+		return;
 	}
-	FreeDir(dir);
 
-	(*action) (path, true);
+	while (errno = 0, (de = readdir(dir)) != NULL)
+	{
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
+			continue;
+
+		/* fsync the version specific tablespace subdirectory */
+		snprintf(subpath, sizeof(subpath), "%s/%s/%s",
+				 path, de->d_name, TABLESPACE_VERSION_DIRECTORY);
+
+		walkdir(elevel, subpath, action);
+	}
+
+	if (errno)
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not read directory \"%s\": %m", path)));
+
+	FreeDir(dir);
 }
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 5b563a6..322fd40 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -114,8 +114,10 @@ extern int	pg_fsync_writethrough(int fd);
 extern int	pg_fdatasync(int fd);
 extern int	pg_flush_data(int fd, off_t offset, off_t amount);
 extern void fsync_fname(char *fname, bool isdir);
-extern void pre_sync_fname(char *fname, bool isdir);
-extern void walkdir(char *path, void (*action) (char *fname, bool isdir));
+extern void fsync_fname_ext(int elevel, char *fname, bool isdir);
+extern void pre_sync_fname(int elevel, char *fname, bool isdir);
+extern void walkdir(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir));
+extern void walktblspc_links(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir));
 
 /* Filename components for OpenTemporaryFile */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to