At 2015-05-27 11:46:39 +0530, a...@2ndquadrant.com wrote: > > I'm trying a couple of approaches to that (e.g. using readdir directly > instead of ReadDir), but other suggestions are welcome.
Here's what that looks like, but not yet fully tested. -- Abhijit
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 087b6be..6956e88 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -11609,16 +11609,41 @@ SetWalWriterSleeping(bool sleeping) static void fsync_pgdata(char *datadir) { + bool xlog_is_symlink; + char pg_xlog[MAXPGPATH]; + char pg_tblspc[MAXPGPATH]; + struct stat st; + if (!enableFsync) return; + snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", datadir); + snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", datadir); + + if (lstat(pg_xlog, &st) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat directory \"pg_xlog\": %m"))); + + xlog_is_symlink = false; +#ifndef WIN32 + if (S_ISLNK(st.st_mode)) + xlog_is_symlink = true; +#else + if (pgwin32_is_junction(subpath)) + xlog_is_symlink = true; +#endif + /* * If possible, hint to the kernel that we're soon going to fsync the data * directory and its contents. */ #if defined(HAVE_SYNC_FILE_RANGE) || \ (defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)) - walkdir(datadir, pre_sync_fname); + walkdir(DEBUG1, datadir, pre_sync_fname); + if (xlog_is_symlink) + walkdir(DEBUG1, pg_xlog, pre_sync_fname); + walktblspc_links(DEBUG1, pg_tblspc, pre_sync_fname); #endif /* @@ -11628,5 +11653,8 @@ fsync_pgdata(char *datadir) * file fsyncs don't guarantee that the directory entry for the file is * synced. */ - walkdir(datadir, fsync_fname); + walkdir(WARNING, datadir, fsync_fname_ext); + if (xlog_is_symlink) + walkdir(DEBUG1, pg_xlog, fsync_fname_ext); + walktblspc_links(WARNING, pg_tblspc, fsync_fname_ext); } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 68d43c6..711b4b7 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -2443,48 +2443,113 @@ looks_like_temp_rel_name(const char *name) /* * Hint to the OS that it should get ready to fsync() this file. * - * Adapted from pre_sync_fname in initdb.c + * Ignores errors trying to open unreadable files, and logs other errors at a + * caller-specified level. */ void -pre_sync_fname(char *fname, bool isdir) +pre_sync_fname(int elevel, char *fname, bool isdir) { int fd; fd = BasicOpenFile(fname, O_RDONLY | PG_BINARY, 0); + if (fd < 0) + { + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", fname))); + } + + (void) pg_flush_data(fd, 0, 0); + (void) close(fd); +} + +/* + * fsync_fname_ext -- Try to fsync a file or directory + * + * Ignores errors trying to open unreadable files, or trying to fsync + * directories on systems where that isn't allowed/required, and logs other + * errors at a caller-specified level. + */ +void +fsync_fname_ext(int elevel, char *fname, bool isdir) +{ + int fd; + int returncode; + /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) + * Some OSs require directories to be opened read-only whereas other + * systems don't allow us to fsync files opened read-only; so we need both + * cases here */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; + if (!isdir) + fd = OpenTransientFile(fname, + O_RDWR | PG_BINARY, + S_IRUSR | S_IWUSR); + else + fd = OpenTransientFile(fname, + O_RDONLY | PG_BINARY, + S_IRUSR | S_IWUSR); if (fd < 0) - ereport(FATAL, - (errmsg("could not open file \"%s\": %m", - fname))); + { + if (errno == EACCES || (isdir && errno == EISDIR)) + return; - pg_flush_data(fd, 0, 0); +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif - close(fd); + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", fname))); + } + + returncode = pg_fsync(fd); + + /* + * Some OSes don't allow us to fsync directories at all, so we can ignore + * those errors. Anything else needs to be logged. + */ + if (returncode != 0 && !(isdir && errno == EBADF)) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", fname))); + + CloseTransientFile(fd); } /* * walkdir: recursively walk a directory, applying the action to each - * regular file and directory (including the named directory itself) - * and following symbolic links. + * regular file and directory (including the named directory itself). * - * NB: There is another version of walkdir in initdb.c, but that version - * behaves differently with respect to symbolic links. Caveat emptor! + * Adapted from walkdir in initdb.c */ void -walkdir(char *path, void (*action) (char *fname, bool isdir)) +walkdir(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir)) { DIR *dir; struct dirent *de; dir = AllocateDir(path); - while ((de = ReadDir(dir, path)) != NULL) + if (dir == NULL) + { + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", path))); + return; + } + + while (errno = 0, (de = readdir(dir)) != NULL) { char subpath[MAXPGPATH]; struct stat fst; @@ -2498,59 +2563,66 @@ walkdir(char *path, void (*action) (char *fname, bool isdir)) snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name); if (lstat(subpath, &fst) < 0) - ereport(ERROR, + ereport(elevel, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", subpath))); if (S_ISREG(fst.st_mode)) - (*action) (subpath, false); + (*action) (elevel, subpath, false); else if (S_ISDIR(fst.st_mode)) - walkdir(subpath, action); -#ifndef WIN32 - else if (S_ISLNK(fst.st_mode)) -#else - else if (pgwin32_is_junction(subpath)) -#endif - { -#if defined(HAVE_READLINK) || defined(WIN32) - char linkpath[MAXPGPATH]; - int len; - struct stat lst; + walkdir(elevel, subpath, action); + } - len = readlink(subpath, linkpath, sizeof(linkpath) - 1); - if (len < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read symbolic link \"%s\": %m", - subpath))); + if (errno) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not read directory \"%s\": %m", path))); - if (len >= sizeof(linkpath) - 1) - ereport(ERROR, - (errmsg("symbolic link \"%s\" target is too long", - subpath))); + FreeDir(dir); - linkpath[len] = '\0'; + (*action) (elevel, path, true); +} - if (lstat(linkpath, &lst) == 0) - { - if (S_ISREG(lst.st_mode)) - (*action) (linkpath, false); - else if (S_ISDIR(lst.st_mode)) - walkdir(subpath, action); - } - else if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", linkpath))); -#else - ereport(WARNING, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("this platform does not support symbolic links; ignoring \"%s\"", - subpath))); -#endif - } +/* + * walktblspc_links: call walkdir on each entry under the given + * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist. + * + * Adapted from walktblspc_links in initdb.c + */ +void +walktblspc_links(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir)) +{ + DIR *dir; + struct dirent *de; + char subpath[MAXPGPATH]; + + dir = AllocateDir(path); + if (dir == NULL) + { + if (errno != ENOENT) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", path))); + return; } - FreeDir(dir); - (*action) (path, true); + while (errno = 0, (de = readdir(dir)) != NULL) + { + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + /* fsync the version specific tablespace subdirectory */ + snprintf(subpath, sizeof(subpath), "%s/%s/%s", + path, de->d_name, TABLESPACE_VERSION_DIRECTORY); + + walkdir(elevel, subpath, action); + } + + if (errno) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not read directory \"%s\": %m", path))); + + FreeDir(dir); } diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 5b563a6..322fd40 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -114,8 +114,10 @@ extern int pg_fsync_writethrough(int fd); extern int pg_fdatasync(int fd); extern int pg_flush_data(int fd, off_t offset, off_t amount); extern void fsync_fname(char *fname, bool isdir); -extern void pre_sync_fname(char *fname, bool isdir); -extern void walkdir(char *path, void (*action) (char *fname, bool isdir)); +extern void fsync_fname_ext(int elevel, char *fname, bool isdir); +extern void pre_sync_fname(int elevel, char *fname, bool isdir); +extern void walkdir(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir)); +extern void walktblspc_links(int elevel, char *path, void (*action) (int elevel, char *fname, bool isdir)); /* Filename components for OpenTemporaryFile */ #define PG_TEMP_FILES_DIR "pgsql_tmp"
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers