At 2015-05-28 19:56:15 +0530, a...@2ndquadrant.com wrote: > > I have a separate patch to initdb with the corresponding changes, which > I will post after dinner and a bit more testing.
Here's that patch too. I was a bit unsure how far to go with this. It fixes the problem of not following pg_xlog if it's a symlink (Andres) and the one where it died on bogus stuff in pg_tblspc (Tom) and unreadable files anywhere else (it now spews errors to stderr, but carries on for as long as it can). I've done a bit more than strictly necessary to fix those problems, though, and made the code largely similar to what's in the other patch. If you want something minimal, let me know and I'll cut it down to size. -- Abhijit P.S. If this patch gets applied, then the "Adapted from walktblspc_links in initdb.c" comment in fd.c should be changed: s/links/entries/.
>From 8e69433fae0b4f51879793f6594c76b99d764818 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen <a...@2ndquadrant.com> Date: Thu, 28 May 2015 22:02:29 +0530 Subject: Make initdb -S behave like xlog.c:fsync_pgdata() In particular, it should silently skip unreadable/unexpected files in the data directory and follow symlinks only for pg_xlog and under pg_tblspc. --- src/bin/initdb/initdb.c | 235 +++++++++++++++++++++++++----------------------- 1 file changed, 122 insertions(+), 113 deletions(-) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f1c4920..8ebaa55 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -218,9 +218,9 @@ static char **filter_lines_with_token(char **lines, const char *token); static char **readfile(const char *path); static void writefile(char *path, char **lines); static void walkdir(char *path, void (*action) (char *fname, bool isdir)); -static void walktblspc_links(char *path, void (*action) (char *fname, bool isdir)); +static void walktblspc_entries(char *path, void (*action) (char *fname, bool isdir)); static void pre_sync_fname(char *fname, bool isdir); -static void fsync_fname(char *fname, bool isdir); +static void fsync_fname_ext(char *fname, bool isdir); static FILE *popen_check(const char *command, const char *mode); static void exit_nicely(void); static char *get_id(void); @@ -248,7 +248,7 @@ static void load_plpgsql(void); static void vacuum_db(void); static void make_template0(void); static void make_postgres(void); -static void perform_fsync(void); +static void fsync_pgdata(void); static void trapsig(int signum); static void check_ok(void); static char *escape_quotes(const char *src); @@ -518,38 +518,38 @@ writefile(char *path, char **lines) * walkdir: recursively walk a directory, applying the action to each * regular file and directory (including the named directory itself). * - * Adapted from copydir() in copydir.c. + * Adapted from walkdir() in backend/storage/file/fd.c. */ static void walkdir(char *path, void (*action) (char *fname, bool isdir)) { DIR *dir; - struct dirent *direntry; - char subpath[MAXPGPATH]; + struct dirent *de; dir = opendir(path); if (dir == NULL) { fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); + return; } - while (errno = 0, (direntry = readdir(dir)) != NULL) + while (errno = 0, (de = readdir(dir)) != NULL) { + char subpath[MAXPGPATH]; struct stat fst; - if (strcmp(direntry->d_name, ".") == 0 || - strcmp(direntry->d_name, "..") == 0) + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) continue; - snprintf(subpath, MAXPGPATH, "%s/%s", path, direntry->d_name); + snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name); if (lstat(subpath, &fst) < 0) { fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), progname, subpath, strerror(errno)); - exit_nicely(); + continue; } if (S_ISDIR(fst.st_mode)) @@ -559,75 +559,72 @@ walkdir(char *path, void (*action) (char *fname, bool isdir)) } if (errno) - { fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); - } - if (closedir(dir)) - { - fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } + (void) closedir(dir); - /* - * It's important to fsync the destination directory itself as individual - * file fsyncs don't guarantee that the directory entry for the file is - * synced. Recent versions of ext4 have made the window much wider but - * it's been an issue for ext3 and other filesystems in the past. - */ (*action) (path, true); } /* - * walktblspc_links: call walkdir on each entry under the given - * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist. + * walktblspc_entries -- apply the action to the entries in pb_tblspc + * + * We expect to find only symlinks to tablespace directories here, but + * we'll apply the action to regular files, and call walkdir() on any + * directories as well. + * + * Adapted from walktblspc_entries in backend/storage/file/fd.c */ static void -walktblspc_links(char *path, void (*action) (char *fname, bool isdir)) +walktblspc_entries(char *path, void (*action) (char *fname, bool isdir)) { DIR *dir; - struct dirent *direntry; - char subpath[MAXPGPATH]; + struct dirent *de; dir = opendir(path); if (dir == NULL) { - if (errno == ENOENT) - return; fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); + return; } - while (errno = 0, (direntry = readdir(dir)) != NULL) + while (errno = 0, (de = readdir(dir)) != NULL) { - if (strcmp(direntry->d_name, ".") == 0 || - strcmp(direntry->d_name, "..") == 0) + char subpath[MAXPGPATH]; + struct stat fst; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) continue; - /* fsync the version specific tablespace subdirectory */ - snprintf(subpath, sizeof(subpath), "%s/%s/%s", - path, direntry->d_name, TABLESPACE_VERSION_DIRECTORY); + snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name); - walkdir(subpath, action); + /* + * We use stat rather than lstat here because we want to know if + * a link points to a directory or not. + */ + if (stat(subpath, &fst) < 0) + { + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, subpath, strerror(errno)); + continue; + } + + if (S_ISREG(fst.st_mode)) + (*action) (subpath, false); + else if (S_ISDIR(fst.st_mode)) + walkdir(subpath, action); } if (errno) - { fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); - } - if (closedir(dir)) - { - fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } + (void) closedir(dir); + + (*action) (path, true); } /* @@ -641,19 +638,19 @@ pre_sync_fname(char *fname, bool isdir) int fd; fd = open(fname, O_RDONLY | PG_BINARY); - - /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) - */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; - if (fd < 0) { - fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + + fprintf(stdout, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); + return; } /* @@ -676,12 +673,14 @@ pre_sync_fname(char *fname, bool isdir) * Try to fsync directories but ignore errors that indicate the OS * just doesn't allow/require fsyncing directories. * - * Adapted from fsync_fname() in copydir.c. + * Adapted from fsync_fname_ext() in backend/storage/file/fd.c (in + * particular, it silently ignores errors opening unreadable files). */ static void -fsync_fname(char *fname, bool isdir) +fsync_fname_ext(char *fname, bool isdir) { int fd; + int flags; int returncode; /* @@ -689,40 +688,33 @@ fsync_fname(char *fname, bool isdir) * systems don't allow us to fsync files opened read-only; so we need both * cases here */ + flags = PG_BINARY; if (!isdir) - fd = open(fname, O_RDWR | PG_BINARY); + flags |= O_RDWR; else - fd = open(fname, O_RDONLY | PG_BINARY); - - /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) - */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; + flags |= O_RDONLY; - else if (fd < 0) + fd = open(fname, flags); + if (fd < 0) { + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); + return; } returncode = fsync(fd); - /* Some OSs don't allow us to fsync directories at all */ - if (returncode != 0 && isdir && errno == EBADF) - { - close(fd); - return; - } - - if (returncode != 0) - { + if (returncode != 0 && !(isdir && errno == EBADF)) fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); - } close(fd); } @@ -2419,50 +2411,67 @@ make_postgres(void) } /* - * fsync everything down to disk + * fsync the data directory and its contents to disk + * + * Ignore unreadable files, and follow symlinks only for pg_xlog and + * under pg_tblspc. */ static void -perform_fsync(void) +fsync_pgdata(void) { - char pdir[MAXPGPATH]; + bool xlog_is_symlink; + char pg_xlog[MAXPGPATH]; char pg_tblspc[MAXPGPATH]; + struct stat st; fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - /* - * We need to name the parent of PGDATA. get_parent_directory() isn't - * enough here, because it can result in an empty string. - */ - snprintf(pdir, MAXPGPATH, "%s/..", pg_data); - canonicalize_path(pdir); + snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", pg_data); + snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); /* - * Hint to the OS so that we're going to fsync each of these files soon. + * If pg_xlog is a symlink, then we need to recurse into it separately, + * because the first walkdir below will ignore it. */ + xlog_is_symlink = false; - /* first the parent of the PGDATA directory */ - pre_sync_fname(pdir, true); - - /* then recursively through the data directory */ - walkdir(pg_data, pre_sync_fname); + if (lstat(pg_xlog, &st) < 0) + { + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, pg_xlog, strerror(errno)); + exit_nicely(); + } - /* now do the same thing for everything under pg_tblspc */ - snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); - walktblspc_links(pg_tblspc, pre_sync_fname); +#ifndef WIN32 + if (S_ISLNK(st.st_mode)) + xlog_is_symlink = true; +#else + if (pgwin32_is_junction(subpath)) + xlog_is_symlink = true; +#endif /* - * Now, do the fsync()s in the same order. + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. */ + walkdir(pg_data, pre_sync_fname); + if (xlog_is_symlink) + walkdir(pg_xlog, pre_sync_fname); + walktblspc_entries(pg_tblspc, pre_sync_fname); - /* first the parent of the PGDATA directory */ - fsync_fname(pdir, true); - - /* then recursively through the data directory */ - walkdir(pg_data, fsync_fname); + /* + * Now we do the fsync()s in the same order. + * + * It's important to fsync the destination directory itself as individual + * file fsyncs don't guarantee that the directory entry for the file is + * synced. + */ - /* and now the same for all tablespaces */ - walktblspc_links(pg_tblspc, fsync_fname); + walkdir(pg_data, fsync_fname_ext); + if (xlog_is_symlink) + walkdir(pg_xlog, fsync_fname_ext); + walktblspc_entries(pg_tblspc, fsync_fname_ext); check_ok(); } @@ -3576,7 +3585,7 @@ main(int argc, char *argv[]) if (sync_only) { setup_pgdata(); - perform_fsync(); + fsync_pgdata(); return 0; } @@ -3629,7 +3638,7 @@ main(int argc, char *argv[]) initialize_data_directory(); if (do_sync) - perform_fsync(); + fsync_pgdata(); else printf(_("\nSync to disk skipped.\nThe data directory might become corrupt if the operating system crashes.\n")); -- 1.9.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers