Working on get_dirent_type() reminded me of this thread. I was going to commit the first of these patches, but then I noticed Andres said he was planning to, so I'll wait another day. Here they are, with commit messages but otherwise unchanged from Nathan's v12 except for a slight comment tweak:
- /* we're only handling directories here, skip if it's not ours */ + /* we're only handling directories here, skip if it's not one */ The only hunk I'm having second thoughts about is the following, which makes unexpected stray files break checkpoints: - * We just log a message if a file doesn't fit the pattern, it's - * probably some editors lock/state file or similar... */ if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2) - { - ereport(LOG, + ereport(ERROR, (errmsg("could not parse file name \"%s\"", path))); Bharath mentioned other places that loop over stat(), but I think those are places that want stuff we don't already have, like st_size.
From f0be5188830655b9ce9375b6d13b1ca728dbfcdd Mon Sep 17 00:00:00 2001 From: Thomas Munro <tmu...@postgresql.org> Date: Tue, 9 Aug 2022 13:11:08 +1200 Subject: [PATCH v13 1/2] Make more use of get_dirent_type(). We don't need to call stat() or lstat() in several ReadDir() loops, if d_type is available, as it usually is on most systems. If not, get_dirent_type() will do that under the covers. In a couple of places this will now raise an ERROR if an internal stat/lstat fails, where previously errors were silently ignored. ENOENT was not expected in these locations, because the loop in question is the one responsible for unlinking files during checkpoint, or runs at startup. Author: Nathan Bossart <nathandboss...@gmail.com> Reviewed-by: Thomas Munro <thomas.mu...@gmail.com> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostg...@gmail.com> Reviewed-by: Andres Freund <and...@anarazel.de> Discussion: https://postgr.es/m/20220215231123.GA2584239%40nathanxps13 --- src/backend/access/heap/rewriteheap.c | 4 +- .../replication/logical/reorderbuffer.c | 12 +++--- src/backend/replication/logical/snapbuild.c | 5 +-- src/backend/replication/slot.c | 4 +- src/backend/storage/file/copydir.c | 21 +++------- src/backend/storage/file/fd.c | 20 +++++---- src/backend/utils/misc/guc-file.l | 42 +++++++------------ src/timezone/pgtz.c | 8 +--- 8 files changed, 48 insertions(+), 68 deletions(-) diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 9dd885d936..525039d5b2 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -113,6 +113,7 @@ #include "access/xact.h" #include "access/xloginsert.h" #include "catalog/catalog.h" +#include "common/file_utils.h" #include "lib/ilist.h" #include "miscadmin.h" #include "pgstat.h" @@ -1213,7 +1214,6 @@ CheckPointLogicalRewriteHeap(void) mappings_dir = AllocateDir("pg_logical/mappings"); while ((mapping_de = ReadDir(mappings_dir, "pg_logical/mappings")) != NULL) { - struct stat statbuf; Oid dboid; Oid relid; XLogRecPtr lsn; @@ -1227,7 +1227,7 @@ CheckPointLogicalRewriteHeap(void) continue; snprintf(path, sizeof(path), "pg_logical/mappings/%s", mapping_de->d_name); - if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) + if (get_dirent_type(path, mapping_de, false, ERROR) != PGFILETYPE_REG) continue; /* Skip over files that cannot be ours. */ diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 88a37fde72..f2b6cdf473 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -91,6 +91,7 @@ #include "access/xact.h" #include "access/xlog_internal.h" #include "catalog/catalog.h" +#include "common/file_utils.h" #include "lib/binaryheap.h" #include "miscadmin.h" #include "pgstat.h" @@ -4407,15 +4408,10 @@ ReorderBufferCleanupSerializedTXNs(const char *slotname) { DIR *spill_dir; struct dirent *spill_de; - struct stat statbuf; char path[MAXPGPATH * 2 + 12]; sprintf(path, "pg_replslot/%s", slotname); - /* we're only handling directories here, skip if it's not ours */ - if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) - return; - spill_dir = AllocateDir(path); while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL) { @@ -4463,6 +4459,7 @@ StartupReorderBuffer(void) { DIR *logical_dir; struct dirent *logical_de; + char path[MAXPGPATH * 2 + 12]; logical_dir = AllocateDir("pg_replslot"); while ((logical_de = ReadDir(logical_dir, "pg_replslot")) != NULL) @@ -4475,6 +4472,11 @@ StartupReorderBuffer(void) if (!ReplicationSlotValidateName(logical_de->d_name, DEBUG2)) continue; + /* we're only handling directories here, skip if it's not one */ + sprintf(path, "pg_replslot/%s", logical_de->d_name); + if (get_dirent_type(path, logical_de, false, ERROR) != PGFILETYPE_DIR) + continue; + /* * ok, has to be a surviving logical slot, iterate and delete * everything starting with xid-* diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 73c0f15214..8587e2de43 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -123,6 +123,7 @@ #include "access/heapam_xlog.h" #include "access/transam.h" #include "access/xact.h" +#include "common/file_utils.h" #include "miscadmin.h" #include "pgstat.h" #include "replication/logical.h" @@ -1946,15 +1947,13 @@ CheckPointSnapBuild(void) uint32 hi; uint32 lo; XLogRecPtr lsn; - struct stat statbuf; if (strcmp(snap_de->d_name, ".") == 0 || strcmp(snap_de->d_name, "..") == 0) continue; snprintf(path, sizeof(path), "pg_logical/snapshots/%s", snap_de->d_name); - - if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) + if (get_dirent_type(path, snap_de, false, ERROR) != PGFILETYPE_REG) { elog(DEBUG1, "only regular files expected: %s", path); continue; diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 850b74936f..bc51e816f9 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -41,6 +41,7 @@ #include "access/transam.h" #include "access/xlog_internal.h" +#include "common/file_utils.h" #include "common/string.h" #include "miscadmin.h" #include "pgstat.h" @@ -1442,7 +1443,6 @@ StartupReplicationSlots(void) replication_dir = AllocateDir("pg_replslot"); while ((replication_de = ReadDir(replication_dir, "pg_replslot")) != NULL) { - struct stat statbuf; char path[MAXPGPATH + 12]; if (strcmp(replication_de->d_name, ".") == 0 || @@ -1452,7 +1452,7 @@ StartupReplicationSlots(void) snprintf(path, sizeof(path), "pg_replslot/%s", replication_de->d_name); /* we're only creating directories here, skip if it's not our's */ - if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) + if (get_dirent_type(path, replication_de, false, ERROR) != PGFILETYPE_DIR) continue; /* we crashed while a slot was being setup or deleted, clean up */ diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c index 658fd95ba9..8a866191e1 100644 --- a/src/backend/storage/file/copydir.c +++ b/src/backend/storage/file/copydir.c @@ -22,6 +22,7 @@ #include <unistd.h> #include <sys/stat.h> +#include "common/file_utils.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/copydir.h" @@ -50,7 +51,7 @@ copydir(char *fromdir, char *todir, bool recurse) while ((xlde = ReadDir(xldir, fromdir)) != NULL) { - struct stat fst; + PGFileType xlde_type; /* If we got a cancel signal during the copy of the directory, quit */ CHECK_FOR_INTERRUPTS(); @@ -62,18 +63,15 @@ copydir(char *fromdir, char *todir, bool recurse) snprintf(fromfile, sizeof(fromfile), "%s/%s", fromdir, xlde->d_name); snprintf(tofile, sizeof(tofile), "%s/%s", todir, xlde->d_name); - if (lstat(fromfile, &fst) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", fromfile))); + xlde_type = get_dirent_type(fromfile, xlde, false, ERROR); - if (S_ISDIR(fst.st_mode)) + if (xlde_type == PGFILETYPE_DIR) { /* recurse to handle subdirectories */ if (recurse) copydir(fromfile, tofile, true); } - else if (S_ISREG(fst.st_mode)) + else if (xlde_type == PGFILETYPE_REG) copy_file(fromfile, tofile); } FreeDir(xldir); @@ -89,8 +87,6 @@ copydir(char *fromdir, char *todir, bool recurse) while ((xlde = ReadDir(xldir, todir)) != NULL) { - struct stat fst; - if (strcmp(xlde->d_name, ".") == 0 || strcmp(xlde->d_name, "..") == 0) continue; @@ -101,12 +97,7 @@ copydir(char *fromdir, char *todir, bool recurse) * We don't need to sync subdirectories here since the recursive * copydir will do it before it returns */ - if (lstat(tofile, &fst) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", tofile))); - - if (S_ISREG(fst.st_mode)) + if (get_dirent_type(tofile, xlde, false, ERROR) == PGFILETYPE_REG) fsync_fname(tofile, false); } FreeDir(xldir); diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index efb34d4dcb..64764a287d 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -2708,6 +2708,10 @@ TryAgain: * * The pathname passed to AllocateDir must be passed to this routine too, * but it is only used for error reporting. + * + * If you need to discover the file type of an entry, consider using + * get_dirent_type() (instead of stat()/lstat()) to avoid extra system calls on + * many platforms. */ struct dirent * ReadDir(DIR *dir, const char *dirname) @@ -2723,6 +2727,10 @@ ReadDir(DIR *dir, const char *dirname) * If elevel < ERROR, returns NULL after any error. With the normal coding * pattern, this will result in falling out of the loop immediately as * though the directory contained no (more) entries. + * + * If you need to discover the file type of an entry, consider using + * get_dirent_type() (instead of stat()/lstat()) to avoid extra system calls on + * many platforms. */ struct dirent * ReadDirExtended(DIR *dir, const char *dirname, int elevel) @@ -3159,17 +3167,11 @@ RemovePgTempFilesInDir(const char *tmpdirname, bool missing_ok, bool unlink_all) PG_TEMP_FILE_PREFIX, strlen(PG_TEMP_FILE_PREFIX)) == 0) { - struct stat statbuf; + PGFileType type = get_dirent_type(rm_path, temp_de, false, LOG); - if (lstat(rm_path, &statbuf) < 0) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", rm_path))); + if (type == PGFILETYPE_ERROR) continue; - } - - if (S_ISDIR(statbuf.st_mode)) + else if (type == PGFILETYPE_DIR) { /* recursively remove contents, then directory itself */ RemovePgTempFilesInDir(rm_path, false, true); diff --git a/src/backend/utils/misc/guc-file.l b/src/backend/utils/misc/guc-file.l index ce5633844c..88460422dd 100644 --- a/src/backend/utils/misc/guc-file.l +++ b/src/backend/utils/misc/guc-file.l @@ -14,6 +14,7 @@ #include <ctype.h> #include <unistd.h> +#include "common/file_utils.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "storage/fd.h" @@ -1017,7 +1018,7 @@ ParseConfigDirectory(const char *includedir, while ((de = ReadDir(d, directory)) != NULL) { - struct stat st; + PGFileType de_type; char filename[MAXPGPATH]; /* @@ -1034,32 +1035,9 @@ ParseConfigDirectory(const char *includedir, join_path_components(filename, directory, de->d_name); canonicalize_path(filename); - if (stat(filename, &st) == 0) + de_type = get_dirent_type(filename, de, true, elevel); + if (de_type == PGFILETYPE_ERROR) { - if (!S_ISDIR(st.st_mode)) - { - /* Add file to array, increasing its size in blocks of 32 */ - if (num_filenames >= size_filenames) - { - size_filenames += 32; - filenames = (char **) repalloc(filenames, - size_filenames * sizeof(char *)); - } - filenames[num_filenames] = pstrdup(filename); - num_filenames++; - } - } - else - { - /* - * stat does not care about permissions, so the most likely reason - * a file can't be accessed now is if it was removed between the - * directory listing and now. - */ - ereport(elevel, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - filename))); record_config_file_error(psprintf("could not stat file \"%s\"", filename), calling_file, calling_lineno, @@ -1067,6 +1045,18 @@ ParseConfigDirectory(const char *includedir, status = false; goto cleanup; } + else if (de_type != PGFILETYPE_DIR) + { + /* Add file to array, increasing its size in blocks of 32 */ + if (num_filenames >= size_filenames) + { + size_filenames += 32; + filenames = (char **) repalloc(filenames, + size_filenames * sizeof(char *)); + } + filenames[num_filenames] = pstrdup(filename); + num_filenames++; + } } if (num_filenames > 0) diff --git a/src/timezone/pgtz.c b/src/timezone/pgtz.c index 3c278dd0f3..72f9e3d5e6 100644 --- a/src/timezone/pgtz.c +++ b/src/timezone/pgtz.c @@ -17,6 +17,7 @@ #include <sys/stat.h> #include <time.h> +#include "common/file_utils.h" #include "datatype/timestamp.h" #include "miscadmin.h" #include "pgtz.h" @@ -429,7 +430,6 @@ pg_tzenumerate_next(pg_tzenum *dir) { struct dirent *direntry; char fullname[MAXPGPATH * 2]; - struct stat statbuf; direntry = ReadDir(dir->dirdesc[dir->depth], dir->dirname[dir->depth]); @@ -447,12 +447,8 @@ pg_tzenumerate_next(pg_tzenum *dir) snprintf(fullname, sizeof(fullname), "%s/%s", dir->dirname[dir->depth], direntry->d_name); - if (stat(fullname, &statbuf) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat \"%s\": %m", fullname))); - if (S_ISDIR(statbuf.st_mode)) + if (get_dirent_type(fullname, direntry, true, ERROR) == PGFILETYPE_DIR) { /* Step into the subdirectory */ if (dir->depth >= MAX_TZDIR_DEPTH - 1) -- 2.35.1
From 3e60b8362eaa5fc50093c3fc59bc16773dda550a Mon Sep 17 00:00:00 2001 From: Thomas Munro <tmu...@postgresql.org> Date: Tue, 9 Aug 2022 14:42:10 +1200 Subject: [PATCH v13 2/2] Harden errors on some unexpected filesystem conditions. CheckPointSnapBuild() shouldn't fail to unlink() files. Raise an ERROR, there's something wrong with the filesystem. There is also no reason to tolerate random stray files under pg_logical/snapshots. ReorderBufferCleanupSerializedTXNs() shouldn't be called for a non-directory, so raise an ERROR if opendir() fails, there's something wrong with the filesystem. Author: Nathan Bossart <nathandboss...@gmail.com> Reviewed-by: Thomas Munro <thomas.mu...@gmail.com> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostg...@gmail.com> Reviewed-by: Andres Freund <and...@anarazel.de> Discussion: https://postgr.es/m/20220215231123.GA2584239%40nathanxps13 --- .../replication/logical/reorderbuffer.c | 2 +- src/backend/replication/logical/snapbuild.c | 18 ++---------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index f2b6cdf473..a80cbacd9b 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4413,7 +4413,7 @@ ReorderBufferCleanupSerializedTXNs(const char *slotname) sprintf(path, "pg_replslot/%s", slotname); spill_dir = AllocateDir(path); - while ((spill_de = ReadDirExtended(spill_dir, path, INFO)) != NULL) + while ((spill_de = ReadDir(spill_dir, path)) != NULL) { /* only look at names that can be ours */ if (strncmp(spill_de->d_name, "xid", 3) == 0) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 8587e2de43..bb788e1adc 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1964,16 +1964,10 @@ CheckPointSnapBuild(void) * everything but are postfixed by .$pid.tmp. We can just remove them * the same as other files because there can be none that are * currently being written that are older than cutoff. - * - * We just log a message if a file doesn't fit the pattern, it's - * probably some editors lock/state file or similar... */ if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2) - { - ereport(LOG, + ereport(ERROR, (errmsg("could not parse file name \"%s\"", path))); - continue; - } lsn = ((uint64) hi) << 32 | lo; @@ -1982,19 +1976,11 @@ CheckPointSnapBuild(void) { elog(DEBUG1, "removing snapbuild snapshot %s", path); - /* - * It's not particularly harmful, though strange, if we can't - * remove the file here. Don't prevent the checkpoint from - * completing, that'd be a cure worse than the disease. - */ if (unlink(path) < 0) - { - ereport(LOG, + ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", path))); - continue; - } } } FreeDir(snap_dir); -- 2.35.1