On Fri, Sep 4, 2020 at 3:31 AM Tom Lane <t...@sss.pgh.pa.us> wrote:
> Thomas Munro <thomas.mu...@gmail.com> writes:
> > Hmm.  Well I had it like that in an earlier version, but then I
> > couldn't figure out the right way to write code that would work in
> > both frontend and backend code, without writing two copies in two
> > translation units, or putting the whole thing in a header.  What
> > approach do you prefer?
>
> Well, there are plenty of places in src/port/ where we do things like
>
> #ifndef FRONTEND
>         ereport(ERROR,
>                 (errcode_for_file_access(),
>                  errmsg("could not get junction for \"%s\": %s",
>                         path, msg)));
> #else
>         fprintf(stderr, _("could not get junction for \"%s\": %s\n"),
>                 path, msg);
> #endif
>
> I don't see a compelling reason why this function couldn't report
> stat() failures similarly, especially if we're just going to have
> the callers do exactly the same thing as that anyway.

Ok, so the main weird thing is that you finish up having to pass in an
elevel, but that has different meanings in FE and BE code.  Note that
you still need a PGFILE_ERROR return value, because we don't log
messages at a level that exits non-locally (and that concept doesn't
even exist for FE logging).
From 6f937ccef54afdcebaa52a036591523c67ac9d41 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 3 Sep 2020 13:58:17 +1200
Subject: [PATCH v3 1/3] Skip unnecessary stat() calls in walkdir().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some kernels can tell us the type of a "dirent", so we can avoid a call
to stat() or lstat() in many cases.  In order to be able to apply this
change to both frontend and backend versions of walkdir(), define a new
function get_dirent_type() in a new translation unit file_utils_febe.c.

Reviewed-by: Tom Lane <t...@sss.pgh.pa.us>
Reviewed-by: Juan José Santamaría Flecha <juanjo.santama...@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com
---
 src/backend/storage/file/fd.c    | 33 ++++++-----
 src/common/Makefile              |  1 +
 src/common/file_utils.c          | 32 +++++------
 src/common/file_utils_febe.c     | 99 ++++++++++++++++++++++++++++++++
 src/include/common/file_utils.h  | 22 ++++++-
 src/tools/msvc/Mkvcbuild.pm      |  2 +-
 src/tools/pgindent/typedefs.list |  1 +
 7 files changed, 154 insertions(+), 36 deletions(-)
 create mode 100644 src/common/file_utils_febe.c

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f376a97ed6..bd72a87ee3 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -89,6 +89,7 @@
 #include "access/xlog.h"
 #include "catalog/pg_tablespace.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "portability/mem.h"
@@ -3340,8 +3341,6 @@ walkdir(const char *path,
 	while ((de = ReadDirExtended(dir, path, elevel)) != NULL)
 	{
 		char		subpath[MAXPGPATH * 2];
-		struct stat fst;
-		int			sret;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -3351,23 +3350,23 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-		if (process_symlinks)
-			sret = stat(subpath, &fst);
-		else
-			sret = lstat(subpath, &fst);
-
-		if (sret < 0)
+		switch (get_dirent_type(subpath, de, process_symlinks, elevel))
 		{
-			ereport(elevel,
-					(errcode_for_file_access(),
-					 errmsg("could not stat file \"%s\": %m", subpath)));
-			continue;
-		}
+			case PGFILETYPE_REG:
+				(*action) (subpath, false, elevel);
+				break;
+			case PGFILETYPE_DIR:
+				walkdir(subpath, action, false, elevel);
+				break;
+			default:
 
-		if (S_ISREG(fst.st_mode))
-			(*action) (subpath, false, elevel);
-		else if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action, false, elevel);
+				/*
+				 * Errors are already reported directly by get_dirent_type(),
+				 * and any remaining symlinks and unknown file types are
+				 * ignored.
+				 */
+				break;
+		}
 	}
 
 	FreeDir(dir);				/* we ignore any error here */
diff --git a/src/common/Makefile b/src/common/Makefile
index 16619e4ba8..aac92aabe1 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -56,6 +56,7 @@ OBJS_COMMON = \
 	exec.o \
 	f2s.o \
 	file_perm.o \
+	file_utils_febe.o \
 	hashfn.o \
 	ip.o \
 	jsonapi.o \
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index a2faafdf13..e24f31dd3b 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -2,7 +2,7 @@
  *
  * File-processing utility routines.
  *
- * Assorted utility functions to work on files.
+ * Assorted utility functions to work on files, frontend only.
  *
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
@@ -167,8 +167,6 @@ walkdir(const char *path,
 	while (errno = 0, (de = readdir(dir)) != NULL)
 	{
 		char		subpath[MAXPGPATH * 2];
-		struct stat fst;
-		int			sret;
 
 		if (strcmp(de->d_name, ".") == 0 ||
 			strcmp(de->d_name, "..") == 0)
@@ -176,21 +174,23 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-		if (process_symlinks)
-			sret = stat(subpath, &fst);
-		else
-			sret = lstat(subpath, &fst);
-
-		if (sret < 0)
+		switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
 		{
-			pg_log_error("could not stat file \"%s\": %m", subpath);
-			continue;
+			case PGFILETYPE_REG:
+				(*action) (subpath, false);
+				break;
+			case PGFILETYPE_DIR:
+				walkdir(subpath, action, false);
+				break;
+			default:
+
+				/*
+				 * Errors are already reported directly by get_dirent_type(),
+				 * and any remaining symlinks and unknown file types are
+				 * ignored.
+				 */
+				break;
 		}
-
-		if (S_ISREG(fst.st_mode))
-			(*action) (subpath, false);
-		else if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action, false);
 	}
 
 	if (errno)
diff --git a/src/common/file_utils_febe.c b/src/common/file_utils_febe.c
new file mode 100644
index 0000000000..3c00ab7c4d
--- /dev/null
+++ b/src/common/file_utils_febe.c
@@ -0,0 +1,99 @@
+/*-------------------------------------------------------------------------
+ *
+ * File-processing utility routines.
+ *
+ * Assorted utility functions to work on files, frontend and backend.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_utils_febe.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "common/file_utils.h"
+#ifdef FRONTEND
+#include "common/logging.h"
+#else
+#include "utils/elog.h"
+#endif
+
+/*
+ * Return the type of a directory entry.
+ *
+ * In frontend code, elevel should be a level from logging.h; in backend code
+ * it should be an error level from elog.h.
+ */
+PGFileType
+get_dirent_type(const char *path,
+				const struct dirent *de,
+				bool look_through_symlinks,
+				int elevel)
+{
+	PGFileType	result;
+
+	/*
+	 * We want to know the type of a directory entry.  Some systems tell us
+	 * that directly in the dirent struct, but that's a BSD/GNU extension.
+	 * Even when the interface is present, sometimes the type is unknown,
+	 * depending on the filesystem in use or in some cases options used at
+	 * filesystem creation time.
+	 */
+#if defined(DT_UNKNOWN) && defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
+	if (de->d_type == DT_REG)
+		result = PGFILETYPE_REG;
+	else if (de->d_type == DT_DIR)
+		result = PGFILETYPE_DIR;
+	else if (de->d_type == DT_LNK && !look_through_symlinks)
+		result = PGFILETYPE_LNK;
+	else
+		result = PGFILETYPE_UNKNOWN;
+#else
+	result = PGFILETYPE_UNKNOWN;
+#endif
+
+	if (result == PGFILETYPE_UNKNOWN)
+	{
+		struct stat fst;
+		int			sret;
+
+
+		if (look_through_symlinks)
+			sret = stat(path, &fst);
+		else
+			sret = lstat(path, &fst);
+
+		if (sret < 0)
+		{
+			result = PGFILETYPE_ERROR;
+#ifdef FRONTEND
+			pg_log_generic(elevel, "could not stat file \"%s\": %m", path);
+#else
+			ereport(elevel,
+					(errcode_for_file_access(),
+					 errmsg("could not stat file \"%s\": %m", path)));
+#endif
+		}
+		else if (S_ISREG(fst.st_mode))
+			result = PGFILETYPE_REG;
+		else if (S_ISDIR(fst.st_mode))
+			result = PGFILETYPE_DIR;
+#ifdef S_ISLNK
+		else if (S_ISLNK(fst.st_mode))
+			result = PGFILETYPE_LNK;
+#endif
+	}
+
+	return result;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index a7add75efa..16c7e7e249 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -1,6 +1,4 @@
 /*-------------------------------------------------------------------------
- *
- * File-processing utility routines for frontend code
  *
  * Assorted utility functions to work on files.
  *
@@ -15,10 +13,30 @@
 #ifndef FILE_UTILS_H
 #define FILE_UTILS_H
 
+#include <dirent.h>
+
+typedef enum PGFileType
+{
+	PGFILETYPE_ERROR,
+	PGFILETYPE_UNKNOWN,
+	PGFILETYPE_REG,
+	PGFILETYPE_DIR,
+	PGFILETYPE_LNK
+} PGFileType;
+
+/* Functions defined in file_utils_febe.c for both FE and BE code. */
+extern PGFileType get_dirent_type(const char *path,
+								  const struct dirent *de,
+								  bool look_through_symlinks,
+								  int elevel);
+
+/* Functions defined in file_utils.c only for FE code. */
+#ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
 extern void fsync_pgdata(const char *pg_data, int serverVersion);
 extern void fsync_dir_recurse(const char *dir);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
+#endif
 
 #endif							/* FILE_UTILS_H */
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 20da7985c1..a64127a196 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -121,7 +121,7 @@ sub mkvcbuild
 	our @pgcommonallfiles = qw(
 	  archive.c base64.c checksum_helper.c
 	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
-	  f2s.c file_perm.c hashfn.c ip.c jsonapi.c
+	  f2s.c file_perm.c file_utils_febe.c hashfn.c ip.c jsonapi.c
 	  keywords.c kwlookup.c link-canary.c md5.c
 	  pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 3d990463ce..b4d40dda16 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1514,6 +1514,7 @@ PGEventResultCopy
 PGEventResultCreate
 PGEventResultDestroy
 PGFInfoFunction
+PGFileType
 PGFunction
 PGLZ_HistEntry
 PGLZ_Strategy
-- 
2.20.1

From 616eba62a637b9b7440680d9ca4490d36347be46 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 3 Sep 2020 13:09:52 +1200
Subject: [PATCH v3 2/3] Add d_type to Win32 dirent port.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The member d_type is a BSD/GNU extension to struct dirent, but Windows
has the information required to populate it so let's add it to our
emulation so that get_dirent_type() can use it.

Author: Juan José Santamaría Flecha <juanjo.santama...@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com
---
 src/include/port/win32_msvc/dirent.h | 23 +++++++++++++++++++++++
 src/port/dirent.c                    | 15 +++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/src/include/port/win32_msvc/dirent.h b/src/include/port/win32_msvc/dirent.h
index 9fabdf332b..6a54f964f5 100644
--- a/src/include/port/win32_msvc/dirent.h
+++ b/src/include/port/win32_msvc/dirent.h
@@ -10,6 +10,7 @@ struct dirent
 {
 	long		d_ino;
 	unsigned short d_reclen;
+	unsigned char d_type;
 	unsigned short d_namlen;
 	char		d_name[MAX_PATH];
 };
@@ -20,4 +21,26 @@ DIR		   *opendir(const char *);
 struct dirent *readdir(DIR *);
 int			closedir(DIR *);
 
+/* File types for 'd_type'. */
+enum
+  {
+	DT_UNKNOWN = 0,
+# define DT_UNKNOWN		DT_UNKNOWN
+	DT_FIFO = 1,
+# define DT_FIFO		DT_FIFO
+	DT_CHR = 2,
+# define DT_CHR			DT_CHR
+	DT_DIR = 4,
+# define DT_DIR			DT_DIR
+	DT_BLK = 6,
+# define DT_BLK			DT_BLK
+	DT_REG = 8,
+# define DT_REG			DT_REG
+	DT_LNK = 10,
+# define DT_LNK			DT_LNK
+	DT_SOCK = 12,
+# define DT_SOCK		DT_SOCK
+	DT_WHT = 14
+# define DT_WHT			DT_WHT
+  };
 #endif
diff --git a/src/port/dirent.c b/src/port/dirent.c
index b264484fca..519dd82101 100644
--- a/src/port/dirent.c
+++ b/src/port/dirent.c
@@ -69,6 +69,7 @@ opendir(const char *dirname)
 	d->handle = INVALID_HANDLE_VALUE;
 	d->ret.d_ino = 0;			/* no inodes on win32 */
 	d->ret.d_reclen = 0;		/* not used on win32 */
+	d->ret.d_type = DT_UNKNOWN;
 
 	return d;
 }
@@ -77,6 +78,7 @@ struct dirent *
 readdir(DIR *d)
 {
 	WIN32_FIND_DATA fd;
+	DWORD			attrib;
 
 	if (d->handle == INVALID_HANDLE_VALUE)
 	{
@@ -105,6 +107,19 @@ readdir(DIR *d)
 	}
 	strcpy(d->ret.d_name, fd.cFileName);	/* Both strings are MAX_PATH long */
 	d->ret.d_namlen = strlen(d->ret.d_name);
+	/*
+	 * The only identifed types are: directory, regular file or symbolic link.
+	 * Errors are treated as a file type that could not be determined.
+	 */
+	attrib = GetFileAttributes(d->ret.d_name);
+	if (attrib == INVALID_FILE_ATTRIBUTES)
+		d->ret.d_type = DT_UNKNOWN;
+	else if ((attrib & FILE_ATTRIBUTE_DIRECTORY) != 0)
+		d->ret.d_type = DT_DIR;
+	else if ((attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0)
+		d->ret.d_type = DT_LNK;
+	else
+		d->ret.d_type = DT_REG;
 
 	return &d->ret;
 }
-- 
2.20.1

Reply via email to