At 2015-05-28 17:37:16 -0400, t...@sss.pgh.pa.us wrote: > > I've committed this after some mostly-cosmetic rearrangements.
Thank you. > I have to leave shortly, so I'll look at the initdb cleanup tomorrow. Here's a revision of that patch that's more along the lines of what you committed. It occurred to me that we should probably also silently skip EACCES on opendir and stat/lstat in walkdir. That's what the attached initdb patch does. If you think it's a good idea, there's also a small fixup attached for the backend version too. -- Abhijit
>From 26bb25e99a2954381587bbfe296a50b19a7f047c Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen <a...@2ndquadrant.com> Date: Thu, 28 May 2015 22:02:29 +0530 Subject: Make initdb -S behave like xlog.c:fsync_pgdata() In particular, it should silently skip unreadable/unexpected files in the data directory and follow symlinks only for pg_xlog and under pg_tblspc. --- src/bin/initdb/initdb.c | 305 ++++++++++++++++++++++++------------------------ 1 file changed, 151 insertions(+), 154 deletions(-) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f1c4920..9d5478c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -71,6 +71,13 @@ /* Ideally this would be in a .h file, but it hardly seems worth the trouble */ extern const char *select_default_timezone(const char *share_path); +/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ +#if defined(HAVE_SYNC_FILE_RANGE) +#define PG_FLUSH_DATA_WORKS 1 +#elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) +#define PG_FLUSH_DATA_WORKS 1 +#endif + static const char *auth_methods_host[] = {"trust", "reject", "md5", "password", "ident", "radius", #ifdef ENABLE_GSS "gss", @@ -217,10 +224,13 @@ static char **filter_lines_with_token(char **lines, const char *token); #endif static char **readfile(const char *path); static void writefile(char *path, char **lines); -static void walkdir(char *path, void (*action) (char *fname, bool isdir)); -static void walktblspc_links(char *path, void (*action) (char *fname, bool isdir)); -static void pre_sync_fname(char *fname, bool isdir); -static void fsync_fname(char *fname, bool isdir); +static void walkdir(const char *path, + void (*action) (const char *fname, bool isdir), + bool process_symlinks); +#ifdef PG_FLUSH_DATA_WORKS +static void pre_sync_fname(const char *fname, bool isdir); +#endif +static void fsync_fname_ext(const char *fname, bool isdir); static FILE *popen_check(const char *command, const char *mode); static void exit_nicely(void); static char *get_id(void); @@ -248,7 +258,7 @@ static void load_plpgsql(void); static void vacuum_db(void); static void make_template0(void); static void make_postgres(void); -static void perform_fsync(void); +static void fsync_pgdata(void); static void trapsig(int signum); static void check_ok(void); static char *escape_quotes(const char *src); @@ -521,56 +531,58 @@ writefile(char *path, char **lines) * Adapted from copydir() in copydir.c. */ static void -walkdir(char *path, void (*action) (char *fname, bool isdir)) +walkdir(const char *path, + void (*action) (const char *fname, bool isdir), + bool process_symlinks) { DIR *dir; - struct dirent *direntry; - char subpath[MAXPGPATH]; + struct dirent *de; dir = opendir(path); if (dir == NULL) { - fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); + if (errno != EACCES) + fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), + progname, path, strerror(errno)); + return; } - while (errno = 0, (direntry = readdir(dir)) != NULL) + while (errno = 0, (de = readdir(dir)) != NULL) { + char subpath[MAXPGPATH]; struct stat fst; + int sret; - if (strcmp(direntry->d_name, ".") == 0 || - strcmp(direntry->d_name, "..") == 0) + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) continue; - snprintf(subpath, MAXPGPATH, "%s/%s", path, direntry->d_name); + snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name); + + if (process_symlinks) + sret = stat(subpath, &fst); + else + sret = lstat(subpath, &fst); - if (lstat(subpath, &fst) < 0) + if (sret < 0) { - fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), - progname, subpath, strerror(errno)); - exit_nicely(); + if (errno != EACCES) + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, subpath, strerror(errno)); + continue; } - if (S_ISDIR(fst.st_mode)) - walkdir(subpath, action); - else if (S_ISREG(fst.st_mode)) + if (S_ISREG(fst.st_mode)) (*action) (subpath, false); + else if (S_ISDIR(fst.st_mode)) + walkdir(subpath, action, false); } if (errno) - { fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); - } - if (closedir(dir)) - { - fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } + (void) closedir(dir); /* * It's important to fsync the destination directory itself as individual @@ -582,149 +594,111 @@ walkdir(char *path, void (*action) (char *fname, bool isdir)) } /* - * walktblspc_links: call walkdir on each entry under the given - * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist. - */ -static void -walktblspc_links(char *path, void (*action) (char *fname, bool isdir)) -{ - DIR *dir; - struct dirent *direntry; - char subpath[MAXPGPATH]; - - dir = opendir(path); - if (dir == NULL) - { - if (errno == ENOENT) - return; - fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } - - while (errno = 0, (direntry = readdir(dir)) != NULL) - { - if (strcmp(direntry->d_name, ".") == 0 || - strcmp(direntry->d_name, "..") == 0) - continue; - - /* fsync the version specific tablespace subdirectory */ - snprintf(subpath, sizeof(subpath), "%s/%s/%s", - path, direntry->d_name, TABLESPACE_VERSION_DIRECTORY); - - walkdir(subpath, action); - } - - if (errno) - { - fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } - - if (closedir(dir)) - { - fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } -} - -/* * Hint to the OS that it should get ready to fsync() this file. + * + * Ignores errors trying to open unreadable files, and logs other errors + * at a caller-specified level. */ +#ifdef PG_FLUSH_DATA_WORKS + static void -pre_sync_fname(char *fname, bool isdir) +pre_sync_fname(const char *fname, bool isdir) { -#if defined(HAVE_SYNC_FILE_RANGE) || \ - (defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)) int fd; fd = open(fname, O_RDONLY | PG_BINARY); - /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) - */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; - if (fd < 0) { + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); + return; } /* - * Prefer sync_file_range, else use posix_fadvise. We ignore any error - * here since this operation is only a hint anyway. + * We do what pg_flush_fd would do in the backend: prefer to use + * sync_file_range, but fall back to posix_fadvise, and ignore + * errors because this is only a hint. */ #if defined(HAVE_SYNC_FILE_RANGE) - sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); + (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE); #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) - posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); + (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED); +#else +#error PG_FLUSH_DATA_WORKS should not have been defined #endif - close(fd); -#endif + (void) close(fd); } +#endif /* PG_FLUSH_DATA_WORKS */ + /* - * fsync a file or directory + * fsync_fname_ext -- Try to fsync a file or directory * - * Try to fsync directories but ignore errors that indicate the OS - * just doesn't allow/require fsyncing directories. - * - * Adapted from fsync_fname() in copydir.c. + * Ignores errors trying to open unreadable files, or trying to fsync + * directories on systems where that isn't allowed/required. Reports + * other errors non-fatally. */ static void -fsync_fname(char *fname, bool isdir) +fsync_fname_ext(const char *fname, bool isdir) { int fd; + int flags; int returncode; /* * Some OSs require directories to be opened read-only whereas other * systems don't allow us to fsync files opened read-only; so we need both - * cases here + * cases here. Using O_RDWR will cause us to fail to fsync files that are + * not writable by our userid, but we assume that's OK. */ + flags = PG_BINARY; if (!isdir) - fd = open(fname, O_RDWR | PG_BINARY); + flags |= O_RDWR; else - fd = open(fname, O_RDONLY | PG_BINARY); + flags |= O_RDONLY; /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) + * Open the file, silently ignoring errors about unreadable files (or + * unsupported operations, e.g. opening a directory under Windows), and + * logging others. */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; - - else if (fd < 0) + fd = open(fname, flags); + if (fd < 0) { + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); + return; } returncode = fsync(fd); - /* Some OSs don't allow us to fsync directories at all */ - if (returncode != 0 && isdir && errno == EBADF) - { - close(fd); - return; - } - - if (returncode != 0) - { + /* + * Some OSes don't allow us to fsync directories at all, so we can ignore + * those errors. Anything else needs to be reported. + */ + if (returncode != 0 && !(isdir && errno == EBADF)) fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); - } - close(fd); + (void) close(fd); } /* @@ -2419,50 +2393,73 @@ make_postgres(void) } /* - * fsync everything down to disk + * Issue fsync recursively on PGDATA and all its contents. + * + * We fsync regular files and directories wherever they are, but we + * follow symlinks only for pg_xlog and immediately under pg_tblspc. + * Other symlinks are presumed to point at files we're not responsible + * for fsyncing, and might not have privileges to write at all. + * + * Errors are reported but not considered fatal. */ static void -perform_fsync(void) +fsync_pgdata(void) { - char pdir[MAXPGPATH]; + bool xlog_is_symlink; + char pg_xlog[MAXPGPATH]; char pg_tblspc[MAXPGPATH]; fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - /* - * We need to name the parent of PGDATA. get_parent_directory() isn't - * enough here, because it can result in an empty string. - */ - snprintf(pdir, MAXPGPATH, "%s/..", pg_data); - canonicalize_path(pdir); + snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", pg_data); + snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); /* - * Hint to the OS so that we're going to fsync each of these files soon. + * If pg_xlog is a symlink, we'll need to recurse into it separately, + * because the first walkdir below will ignore it. */ + xlog_is_symlink = false; - /* first the parent of the PGDATA directory */ - pre_sync_fname(pdir, true); - - /* then recursively through the data directory */ - walkdir(pg_data, pre_sync_fname); +#ifndef WIN32 + { + struct stat st; - /* now do the same thing for everything under pg_tblspc */ - snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); - walktblspc_links(pg_tblspc, pre_sync_fname); + if (lstat(pg_xlog, &st) < 0) + fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), + progname, pg_xlog, strerror(errno)); + else if (S_ISLNK(st.st_mode)) + xlog_is_symlink = true; + } +#else + if (pgwin32_is_junction(pg_xlog)) + xlog_is_symlink = true; +#endif /* - * Now, do the fsync()s in the same order. + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. */ +#ifdef PG_FLUSH_DATA_WORKS + walkdir(pg_data, pre_sync_fname, false); + if (xlog_is_symlink) + walkdir(pg_xlog, pre_sync_fname, false); + walkdir(pg_tblspc, pre_sync_fname, true); +#endif - /* first the parent of the PGDATA directory */ - fsync_fname(pdir, true); - - /* then recursively through the data directory */ - walkdir(pg_data, fsync_fname); - - /* and now the same for all tablespaces */ - walktblspc_links(pg_tblspc, fsync_fname); + /* + * Now we do the fsync()s in the same order. + * + * The main call ignores symlinks, so in addition to specially processing + * pg_xlog if it's a symlink, pg_tblspc has to be visited separately with + * process_symlinks = true. Note that if there are any plain directories + * in pg_tblspc, they'll get fsync'd twice. That's not an expected case + * so we don't worry about optimizing it. + */ + walkdir(pg_data, fsync_fname_ext, false); + if (xlog_is_symlink) + walkdir(pg_xlog, fsync_fname_ext, false); + walkdir(pg_tblspc, fsync_fname_ext, true); check_ok(); } @@ -3576,7 +3573,7 @@ main(int argc, char *argv[]) if (sync_only) { setup_pgdata(); - perform_fsync(); + fsync_pgdata(); return 0; } @@ -3629,7 +3626,7 @@ main(int argc, char *argv[]) initialize_data_directory(); if (do_sync) - perform_fsync(); + fsync_pgdata(); else printf(_("\nSync to disk skipped.\nThe data directory might become corrupt if the operating system crashes.\n")); -- 1.9.1
>From 1394a162e681af58a81c8ec420ff526a9a1b3a24 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen <a...@2ndquadrant.com> Date: Fri, 29 May 2015 08:45:04 +0530 Subject: Silently accept EACCES on opendir/stat --- src/backend/storage/file/fd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index b4f6590..af39744 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -2579,9 +2579,10 @@ walkdir(const char *path, dir = AllocateDir(path); if (dir == NULL) { - ereport(elevel, - (errcode_for_file_access(), - errmsg("could not open directory \"%s\": %m", path))); + if (errno != EACCES) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", path))); return; } @@ -2606,9 +2607,10 @@ walkdir(const char *path, if (sret < 0) { - ereport(elevel, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", subpath))); + if (errno != EACCES) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", subpath))); continue; } -- 1.9.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers