On Tue, Mar 03, 2020 at 05:23:13PM -0300, Alvaro Herrera wrote:
> On 2020-Mar-03, Justin Pryzby wrote:
> 
> > But I don't think it makes sense to go through more implementation/review
> > cycles without some agreement from a larger group regarding the
> > desired/intended interface.  Should there be a column for "parent dir" ?  
> > Or a
> > column for "is_dir" ?  Should dirs be shown at all, or only files ?
> 
> IMO: is_dir should be there (and subdirs should be listed), but
> parent_dir should not appear.  Also, the "path" should show the complete
> pathname, including containing dirs, starting from whatever the "root"
> is for the operation.
> 
> So for the example in the initial email, it would look like
> 
> path                                  isdir
> pgsql_tmp11025.0.sharedfileset/               t
> pgsql_tmp11025.0.sharedfileset/0.0    f
> pgsql_tmp11025.0.sharedfileset/1.0    f
> 
> plus additional columns, same as pg_ls_waldir et al.
> 
> I'd rather not have the code assume that there's a single level of
> subdirs, or assuming that an entry in the subdir cannot itself be a dir;
> that might end up hiding files for no good reason.
> 

Thanks for your input, see attached.

I'm not sure if prefer the 0002 patch alone (which recurses into dirs all at
once during the initial call), or 0002+3+4, which incrementally reads the dirs
on each call (but requires keeping dirs opened).

> I don't understand what purpose is served by having pg_ls_waldir() hide
> directories.

We could talk about whether the other functions should show dirs, if it's worth
breaking their return type.  Or if they should show hidden or special files,
which doesn't require breaking the return.  But until then I am to leave the
behavior alone.

-- 
Justin
>From a5b9a03445d1c768662cafebd8ab3bd7a62890aa Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Fri, 27 Dec 2019 23:34:14 -0600
Subject: [PATCH v6 1/4] BUG: in errmsg

Note there's two changes here.
Should backpatch to v12, where pg_ls_tmpdir was added.
---
 src/backend/utils/adt/genfile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index 3741b87486..897b11a77d 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -590,7 +590,7 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 		if (stat(path, &attrib) < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not stat directory \"%s\": %m", dir)));
+					 errmsg("could not stat file \"%s\": %m", path)));
 
 		/* Ignore anything but regular files */
 		if (!S_ISREG(attrib.st_mode))
-- 
2.17.0

>From 2437276b9c7525981d4a70b804c81021b2f5fa1f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 14 Dec 2019 16:22:15 -0600
Subject: [PATCH v6 2/4] pg_ls_tmpdir to show directories

See also 9cd92d1a33699f86aa53d44ab04cc3eb50c18d11
---
 doc/src/sgml/func.sgml           |  13 +--
 src/backend/utils/adt/genfile.c  | 133 +++++++++++++++++++++++--------
 src/include/catalog/catversion.h |   2 +-
 src/include/catalog/pg_proc.dat  |   8 +-
 4 files changed, 114 insertions(+), 42 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 323366feb6..35abff16c9 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -21382,8 +21382,9 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
        </entry>
        <entry><type>setof record</type></entry>
        <entry>
-        List the name, size, and last modification time of files in the
-        temporary directory for <parameter>tablespace</parameter>.  If
+        For the temporary directory for <parameter>tablespace</parameter>,
+        list each file's name, size, last modification time, and boolean
+        indicating if it is a directory.  Directories are shown recursively.  If
         <parameter>tablespace</parameter> is not provided, the
         <literal>pg_default</literal> tablespace is used.  Access is granted
         to members of the <literal>pg_monitor</literal> role and may be
@@ -21479,9 +21480,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
     <primary>pg_ls_tmpdir</primary>
    </indexterm>
    <para>
-    <function>pg_ls_tmpdir</function> returns the name, size, and last modified
-    time (mtime) of each file in the temporary file directory for the specified
-    <parameter>tablespace</parameter>.  If <parameter>tablespace</parameter> is
+    <function>pg_ls_tmpdir</function> lists each file in the temporary file
+    directory for the specified <parameter>tablespace</parameter>, along with
+    its size, last modified time (mtime) and boolean indicating if it is a
+    directory.  Directories are used for temporary files used by parallel
+    processes, and are shown recursively.  If <parameter>tablespace</parameter> is
     not provided, the <literal>pg_default</literal> tablespace is used.  By
     default only superusers and members of the <literal>pg_monitor</literal>
     role can use this function.  Access may be granted to others using
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index 897b11a77d..c5148f547b 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -40,6 +40,11 @@ typedef struct
 	char	   *location;
 	DIR		   *dirdesc;
 	bool		include_dot_dirs;
+
+	/* Used in ls_dir_files: */
+	int			npaths;
+	char		**path;
+	struct stat	*stat;
 } directory_fctx;
 
 
@@ -522,12 +527,73 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
 	return pg_ls_dir(fcinfo);
 }
 
-/* Generic function to return a directory listing of files */
+/*
+ * Populate fctx with list of pathnames and stat structues.  Generate a full
+ * list of the stat structures all at once - the alternative is to re-stat
+ * everything later on, which then requires somehow handling cases like the 2nd
+ * stat fails, or the 2nd stat returns a dir which didn't exist at the time we
+ * originally looked, or existed but wasn't a dir.  stat()ing everything all at
+ * once gets as close as we can to a consistent view of the filesystem, from
+ * which files might be removed or renamed or reordered.
+ */
+static void
+populate_paths(directory_fctx *fctx, bool recurse)
+{
+	struct dirent *de;
+	struct stat	attrib;
+	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+	{
+		char		path[MAXPGPATH];
+
+		/* Skip hidden files */
+		if (de->d_name[0] == '.')
+			continue;
+
+		/* Get the file info */
+		snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
+		if (stat(path, &attrib) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not stat file \"%s\": %m", path)));
+
+		/* Ignore anything but regular files, or dirs, if requested */
+		if (S_ISDIR(attrib.st_mode))
+		{
+			/* Save current dir while recursing */
+			directory_fctx oldfctx = *fctx;
+
+			fctx->location = path;
+			fctx->dirdesc = AllocateDir(path);
+
+			if (recurse)
+				populate_paths(fctx, recurse);
+
+			/* Restore previous dir */
+			FreeDir(fctx->dirdesc);
+			fctx->dirdesc = oldfctx.dirdesc;
+			fctx->location = oldfctx.location;
+		} else if (!S_ISREG(attrib.st_mode))
+			continue;
+
+		/* Reallocate path and stat if npaths is a power of two */
+		if (!(fctx->npaths & (fctx->npaths-1)))
+		{
+			int nalloc = fctx->npaths ? 2 * fctx->npaths : 1;
+			fctx->path = repalloc(fctx->path, nalloc*sizeof(*fctx->path));
+			fctx->stat = repalloc(fctx->stat, nalloc*sizeof(*fctx->stat));
+		}
+
+		fctx->path[fctx->npaths] = pstrdup(path);
+		fctx->stat[fctx->npaths] = attrib;
+		fctx->npaths++;
+	}
+}
+
+/* Generic function to return a directory listing of files (and optionally dirs) */
 static Datum
-pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
+pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool dir_ok)
 {
 	FuncCallContext *funcctx;
-	struct dirent *de;
 	directory_fctx *fctx;
 
 	if (SRF_IS_FIRSTCALL())
@@ -540,17 +606,24 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 
 		fctx = palloc(sizeof(directory_fctx));
 
-		tupdesc = CreateTemplateTupleDesc(3);
+		tupdesc = CreateTemplateTupleDesc(dir_ok ? 4:3);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
 						   TEXTOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size",
 						   INT8OID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification",
 						   TIMESTAMPTZOID, -1, 0);
+		if (dir_ok)
+			TupleDescInitEntry(tupdesc, (AttrNumber) 4, "isdir",
+						   BOOLOID, -1, 0);
+
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		fctx->location = pstrdup(dir);
 		fctx->dirdesc = AllocateDir(fctx->location);
+		fctx->npaths = 0;
+		fctx->path = palloc(sizeof(*fctx->path));
+		fctx->stat = palloc(sizeof(*fctx->stat));
 
 		if (!fctx->dirdesc)
 		{
@@ -566,6 +639,8 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 								fctx->location)));
 		}
 
+		populate_paths(fctx, dir_ok);
+		funcctx->max_calls = fctx->npaths;
 		funcctx->user_fctx = fctx;
 		MemoryContextSwitchTo(oldcontext);
 	}
@@ -573,38 +648,32 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 	funcctx = SRF_PERCALL_SETUP();
 	fctx = (directory_fctx *) funcctx->user_fctx;
 
-	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+	/*
+	 * Having already stat()ed all files, each call now just returns the
+	 * nth element.
+	 */
+	if (funcctx->call_cntr < funcctx->max_calls)
 	{
-		Datum		values[3];
-		bool		nulls[3];
-		char		path[MAXPGPATH * 2];
-		struct stat attrib;
+		struct stat	*stat = &fctx->stat[funcctx->call_cntr];
+		Datum		values[4];
+		bool		nulls[4] = {0};
 		HeapTuple	tuple;
 
-		/* Skip hidden files */
-		if (de->d_name[0] == '.')
-			continue;
-
-		/* Get the file info */
-		snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
-		if (stat(path, &attrib) < 0)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not stat file \"%s\": %m", path)));
-
-		/* Ignore anything but regular files */
-		if (!S_ISREG(attrib.st_mode))
-			continue;
-
-		values[0] = CStringGetTextDatum(de->d_name);
-		values[1] = Int64GetDatum((int64) attrib.st_size);
-		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
-		memset(nulls, 0, sizeof(nulls));
+		values[0] = CStringGetTextDatum(fctx->path[funcctx->call_cntr]);
+		values[1] = Int64GetDatum((int64) stat->st_size);
+		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(stat->st_mtime));
+		if (dir_ok)
+			values[3] = BoolGetDatum(S_ISDIR(stat->st_mode));
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
 
+	for (int i = 0; i < funcctx->max_calls; ++i)
+		pfree(fctx->path[i]);
+
+	pfree(fctx->path);
+	pfree(fctx->stat);
 	FreeDir(fctx->dirdesc);
 	SRF_RETURN_DONE(funcctx);
 }
@@ -613,14 +682,14 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 Datum
 pg_ls_logdir(PG_FUNCTION_ARGS)
 {
-	return pg_ls_dir_files(fcinfo, Log_directory, false);
+	return pg_ls_dir_files(fcinfo, Log_directory, false, false);
 }
 
 /* Function to return the list of files in the WAL directory */
 Datum
 pg_ls_waldir(PG_FUNCTION_ARGS)
 {
-	return pg_ls_dir_files(fcinfo, XLOGDIR, false);
+	return pg_ls_dir_files(fcinfo, XLOGDIR, false, false);
 }
 
 /*
@@ -638,7 +707,7 @@ pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc)
 						tblspc)));
 
 	TempTablespacePath(path, tblspc);
-	return pg_ls_dir_files(fcinfo, path, true);
+	return pg_ls_dir_files(fcinfo, path, true, true);
 }
 
 /*
@@ -667,5 +736,5 @@ pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS)
 Datum
 pg_ls_archive_statusdir(PG_FUNCTION_ARGS)
 {
-	return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true);
+	return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true, false);
 }
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index d4fe84a037..6e1e2b961b 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202003031
+#define CATALOG_VERSION_NO	202003032
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 07a86c7b7b..0dbce4ba09 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10739,13 +10739,13 @@
 { oid => '5029', descr => 'list files in the pgsql_tmp directory',
   proname => 'pg_ls_tmpdir', procost => '10', prorows => '20', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,int8,timestamptz}', proargmodes => '{o,o,o}',
-  proargnames => '{name,size,modification}', prosrc => 'pg_ls_tmpdir_noargs' },
+  proallargtypes => '{text,int8,timestamptz,bool}', proargmodes => '{o,o,o,o}',
+  proargnames => '{name,size,modification,isdir}', prosrc => 'pg_ls_tmpdir_noargs' },
 { oid => '5030', descr => 'list files in the pgsql_tmp directory',
   proname => 'pg_ls_tmpdir', procost => '10', prorows => '20', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => 'oid',
-  proallargtypes => '{oid,text,int8,timestamptz}', proargmodes => '{i,o,o,o}',
-  proargnames => '{tablespace,name,size,modification}',
+  proallargtypes => '{oid,text,int8,timestamptz,bool}', proargmodes => '{i,o,o,o,o}',
+  proargnames => '{tablespace,name,size,modification,isdir}',
   prosrc => 'pg_ls_tmpdir_1arg' },
 
 # hash partitioning constraint function
-- 
2.17.0

>From 864fa87522c3c0a062c11adfc0421cba92ac6efd Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Wed, 4 Mar 2020 17:38:03 -0600
Subject: [PATCH v6 3/4] Convert data structure to use a list

---
 src/backend/utils/adt/genfile.c | 78 +++++++++++++++++----------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index c5148f547b..bc473e79ef 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -37,8 +37,9 @@
 
 typedef struct
 {
-	char	   *location;
-	DIR		   *dirdesc;
+	/* List of opened dirs */
+	List		*location;
+	List		*dirdesc;
 	bool		include_dot_dirs;
 
 	/* Used in ls_dir_files: */
@@ -474,10 +475,9 @@ pg_ls_dir(PG_FUNCTION_ARGS)
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 
 		fctx = palloc(sizeof(directory_fctx));
-		fctx->location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
-
+		fctx->location = lappend(NIL, convert_and_check_filename(PG_GETARG_TEXT_PP(0)));
+		fctx->dirdesc = lappend(NIL, AllocateDir(linitial(fctx->location)));
 		fctx->include_dot_dirs = include_dot_dirs;
-		fctx->dirdesc = AllocateDir(fctx->location);
 
 		if (!fctx->dirdesc)
 		{
@@ -490,7 +490,7 @@ pg_ls_dir(PG_FUNCTION_ARGS)
 				ereport(ERROR,
 						(errcode_for_file_access(),
 						 errmsg("could not open directory \"%s\": %m",
-								fctx->location)));
+								(char*)linitial(fctx->location))));
 		}
 		funcctx->user_fctx = fctx;
 		MemoryContextSwitchTo(oldcontext);
@@ -499,7 +499,7 @@ pg_ls_dir(PG_FUNCTION_ARGS)
 	funcctx = SRF_PERCALL_SETUP();
 	fctx = (directory_fctx *) funcctx->user_fctx;
 
-	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+	while ((de = ReadDir(linitial(fctx->dirdesc), linitial(fctx->location))) != NULL)
 	{
 		if (!fctx->include_dot_dirs &&
 			(strcmp(de->d_name, ".") == 0 ||
@@ -509,7 +509,9 @@ pg_ls_dir(PG_FUNCTION_ARGS)
 		SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(de->d_name));
 	}
 
-	FreeDir(fctx->dirdesc);
+	FreeDir(linitial(fctx->dirdesc));
+	list_free(fctx->dirdesc);
+	list_free_deep(fctx->location);
 
 	SRF_RETURN_DONE(funcctx);
 }
@@ -528,20 +530,25 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
 }
 
 /*
- * Populate fctx with list of pathnames and stat structues.  Generate a full
- * list of the stat structures all at once - the alternative is to re-stat
- * everything later on, which then requires somehow handling cases like the 2nd
- * stat fails, or the 2nd stat returns a dir which didn't exist at the time we
- * originally looked, or existed but wasn't a dir.  stat()ing everything all at
- * once gets as close as we can to a consistent view of the filesystem, from
- * which files might be removed or renamed or reordered.
+ * Populate fctx with list of pathnames and stat structues.
+ *
+ * Generate a full list of the stat structures all at once.  That gets as close
+ * as we can to a consistent view of the filesystem, from which files might be
+ * removed or renamed or reordered.
+ *
+ * recurse is the recursion level into a stack of dirs, or -1 to not decend
+ * into directories.
  */
 static void
-populate_paths(directory_fctx *fctx, bool recurse)
+populate_paths(directory_fctx *fctx, int recurse)
 {
 	struct dirent *de;
 	struct stat	attrib;
-	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+
+	DIR *dirdesc = llast(fctx->dirdesc);
+	char *location = llast(fctx->location);
+
+	while ((de = ReadDir(dirdesc, location)) != NULL)
 	{
 		char		path[MAXPGPATH];
 
@@ -550,28 +557,23 @@ populate_paths(directory_fctx *fctx, bool recurse)
 			continue;
 
 		/* Get the file info */
-		snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
+		snprintf(path, sizeof(path), "%s/%s", location, de->d_name);
 		if (stat(path, &attrib) < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", path)));
 
 		/* Ignore anything but regular files, or dirs, if requested */
-		if (S_ISDIR(attrib.st_mode))
+		if (recurse == -1)
+			; /* Do nothing, fall through */
+		else if (S_ISDIR(attrib.st_mode))
 		{
-			/* Save current dir while recursing */
-			directory_fctx oldfctx = *fctx;
-
-			fctx->location = path;
-			fctx->dirdesc = AllocateDir(path);
-
-			if (recurse)
-				populate_paths(fctx, recurse);
-
-			/* Restore previous dir */
-			FreeDir(fctx->dirdesc);
-			fctx->dirdesc = oldfctx.dirdesc;
-			fctx->location = oldfctx.location;
+			/* Reallocate location and dirdesc whenever recursing */
+			fctx->location = lappend(fctx->location, path);
+			fctx->dirdesc = lappend(fctx->dirdesc, AllocateDir(path));
+			populate_paths(fctx, 1+recurse);
+			FreeDir(llast(fctx->dirdesc));
+			list_delete_last(fctx->dirdesc);
 		} else if (!S_ISREG(attrib.st_mode))
 			continue;
 
@@ -619,8 +621,8 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
-		fctx->location = pstrdup(dir);
-		fctx->dirdesc = AllocateDir(fctx->location);
+		fctx->location = lappend(NIL, pstrdup(dir));
+		fctx->dirdesc = lappend(NIL, AllocateDir(dir));
 		fctx->npaths = 0;
 		fctx->path = palloc(sizeof(*fctx->path));
 		fctx->stat = palloc(sizeof(*fctx->stat));
@@ -636,10 +638,10 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 				ereport(ERROR,
 						(errcode_for_file_access(),
 						 errmsg("could not open directory \"%s\": %m",
-								fctx->location)));
+								dir)));
 		}
 
-		populate_paths(fctx, dir_ok);
+		populate_paths(fctx, dir_ok ? 0 : -1);
 		funcctx->max_calls = fctx->npaths;
 		funcctx->user_fctx = fctx;
 		MemoryContextSwitchTo(oldcontext);
@@ -674,7 +676,9 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 
 	pfree(fctx->path);
 	pfree(fctx->stat);
-	FreeDir(fctx->dirdesc);
+	FreeDir(linitial(fctx->dirdesc));
+	list_free(fctx->dirdesc);
+	list_free(fctx->location);
 	SRF_RETURN_DONE(funcctx);
 }
 
-- 
2.17.0

>From 1a0c463d317d0e63885e7442b53875e9a1fd64e3 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 5 Mar 2020 06:04:35 -0600
Subject: [PATCH v6 4/4] Read each dir incrementally during each SRF call..

This requires a stack and keeping state and opened dirs in the data structure.
It also requires squirreling around with memory contexts.
The SRF implementation seems to preclude use of recursion.
---
 src/backend/utils/adt/genfile.c | 107 ++++++++++++++------------------
 1 file changed, 47 insertions(+), 60 deletions(-)

diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index bc473e79ef..32d5b6f8b9 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -37,15 +37,15 @@
 
 typedef struct
 {
-	/* List of opened dirs */
+	/* Stack of opened dirs */
 	List		*location;
 	List		*dirdesc;
 	bool		include_dot_dirs;
 
 	/* Used in ls_dir_files: */
-	int			npaths;
-	char		**path;
-	struct stat	*stat;
+	bool		dir_ok;
+	char		*path;
+	struct stat	stat;
 } directory_fctx;
 
 
@@ -530,27 +530,37 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
 }
 
 /*
- * Populate fctx with list of pathnames and stat structues.
- *
- * Generate a full list of the stat structures all at once.  That gets as close
- * as we can to a consistent view of the filesystem, from which files might be
- * removed or renamed or reordered.
- *
- * recurse is the recursion level into a stack of dirs, or -1 to not decend
- * into directories.
+ * Update fctx->path and stat with next filename.
+ * Directories are descended into.
+ * The current dir to read from is at fctx[fctx->depth].
  */
-static void
-populate_paths(directory_fctx *fctx, int recurse)
+static int
+populate_paths(directory_fctx *fctx, FuncCallContext *funcctx)
 {
 	struct dirent *de;
-	struct stat	attrib;
-
-	DIR *dirdesc = llast(fctx->dirdesc);
-	char *location = llast(fctx->location);
 
-	while ((de = ReadDir(dirdesc, location)) != NULL)
+	for (;;)
 	{
 		char		path[MAXPGPATH];
+		DIR			*dirdesc = llast(fctx->dirdesc);
+		char		*location = llast(fctx->location);
+
+		Assert(list_length(fctx->dirdesc) == list_length(fctx->location));
+
+		if ((de = ReadDir(dirdesc, location)) == NULL)
+		{
+			/*
+			 * Read to the end of the dir on the top of the stack, now move to
+			 * the next dir.
+			 */
+			if (list_length(fctx->dirdesc) == 1)
+				return 0;
+			FreeDir(llast(fctx->dirdesc));
+			fctx->dirdesc = list_delete_last(fctx->dirdesc);
+			pfree(location);
+			fctx->location = list_delete_last(fctx->location);
+			continue;
+		}
 
 		/* Skip hidden files */
 		if (de->d_name[0] == '.')
@@ -558,36 +568,26 @@ populate_paths(directory_fctx *fctx, int recurse)
 
 		/* Get the file info */
 		snprintf(path, sizeof(path), "%s/%s", location, de->d_name);
-		if (stat(path, &attrib) < 0)
+		if (stat(path, &fctx->stat) < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not stat file \"%s\": %m", path)));
 
 		/* Ignore anything but regular files, or dirs, if requested */
-		if (recurse == -1)
+		if (!fctx->dir_ok)
 			; /* Do nothing, fall through */
-		else if (S_ISDIR(attrib.st_mode))
+		else if (S_ISDIR(fctx->stat.st_mode))
 		{
-			/* Reallocate location and dirdesc whenever recursing */
-			fctx->location = lappend(fctx->location, path);
+			/* Reallocate location and dirdesc whenever descending */
+			MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+			fctx->location = lappend(fctx->location, pstrdup(path));
 			fctx->dirdesc = lappend(fctx->dirdesc, AllocateDir(path));
-			populate_paths(fctx, 1+recurse);
-			FreeDir(llast(fctx->dirdesc));
-			list_delete_last(fctx->dirdesc);
-		} else if (!S_ISREG(attrib.st_mode))
+			MemoryContextSwitchTo(oldcontext);
+		} else if (!S_ISREG(fctx->stat.st_mode))
 			continue;
 
-		/* Reallocate path and stat if npaths is a power of two */
-		if (!(fctx->npaths & (fctx->npaths-1)))
-		{
-			int nalloc = fctx->npaths ? 2 * fctx->npaths : 1;
-			fctx->path = repalloc(fctx->path, nalloc*sizeof(*fctx->path));
-			fctx->stat = repalloc(fctx->stat, nalloc*sizeof(*fctx->stat));
-		}
-
-		fctx->path[fctx->npaths] = pstrdup(path);
-		fctx->stat[fctx->npaths] = attrib;
-		fctx->npaths++;
+		fctx->path = pstrdup(path);
+		return 1;
 	}
 }
 
@@ -623,9 +623,7 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 
 		fctx->location = lappend(NIL, pstrdup(dir));
 		fctx->dirdesc = lappend(NIL, AllocateDir(dir));
-		fctx->npaths = 0;
-		fctx->path = palloc(sizeof(*fctx->path));
-		fctx->stat = palloc(sizeof(*fctx->stat));
+		fctx->dir_ok = dir_ok;
 
 		if (!fctx->dirdesc)
 		{
@@ -641,8 +639,6 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 								dir)));
 		}
 
-		populate_paths(fctx, dir_ok ? 0 : -1);
-		funcctx->max_calls = fctx->npaths;
 		funcctx->user_fctx = fctx;
 		MemoryContextSwitchTo(oldcontext);
 	}
@@ -650,35 +646,26 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok, bool
 	funcctx = SRF_PERCALL_SETUP();
 	fctx = (directory_fctx *) funcctx->user_fctx;
 
-	/*
-	 * Having already stat()ed all files, each call now just returns the
-	 * nth element.
-	 */
-	if (funcctx->call_cntr < funcctx->max_calls)
+	if (populate_paths(fctx, funcctx))
 	{
-		struct stat	*stat = &fctx->stat[funcctx->call_cntr];
 		Datum		values[4];
 		bool		nulls[4] = {0};
 		HeapTuple	tuple;
 
-		values[0] = CStringGetTextDatum(fctx->path[funcctx->call_cntr]);
-		values[1] = Int64GetDatum((int64) stat->st_size);
-		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(stat->st_mtime));
+		values[0] = CStringGetTextDatum(fctx->path);
+		values[1] = Int64GetDatum((int64) fctx->stat.st_size);
+		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fctx->stat.st_mtime));
 		if (dir_ok)
-			values[3] = BoolGetDatum(S_ISDIR(stat->st_mode));
+			values[3] = BoolGetDatum(S_ISDIR(fctx->stat.st_mode));
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+		pfree(fctx->path);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
 
-	for (int i = 0; i < funcctx->max_calls; ++i)
-		pfree(fctx->path[i]);
-
-	pfree(fctx->path);
-	pfree(fctx->stat);
 	FreeDir(linitial(fctx->dirdesc));
 	list_free(fctx->dirdesc);
-	list_free(fctx->location);
+	list_free_deep(fctx->location);
 	SRF_RETURN_DONE(funcctx);
 }
 
-- 
2.17.0

Reply via email to