Thanks for the bug report archived at <https://lists.gnu.org/r/bug-gnulib/2020-04/msg00068.html>. What you appear to be saying is that the Gnulib fts NOSTAT_LEAF_OPTIMIZATION code is buggy when an XFS filesystem is mutating, and that your test case illustrates a mutating filesystem that can cause 'find' and 'du' to dump core because fts.c's tight-cycle check messes up its hash table.

I reproduced the bug on an XFS filesystem on Fedora 31 x86-64.

I worked around the bug by removing that optimization from Gnulib fts.c, by installing the attached patch. I expect that this optimization is obsolete nowadays, as XFS is now working and ReiserFS is either working, or is so unpopular that high performance for du and find is not that important for it.

I have the sneaking suspicion that we can now remove at least some of the special cases for AFS, CIFS, NFS, and /proc filesystems in the leaf_optimization subroutine. However, doing so would merely improve performance, so I resisted the temptation to make that change. I'll cc this message to other people who've touched the fts.c code to see whether they have any advice about that.

Thanks again for reporting the bug.
>From 6d6812a584ac16b21c1f06b89c28d74fe5931475 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 15 Apr 2020 20:50:32 -0700
Subject: [PATCH] fts: remove NOSTAT_LEAF_OPTIMIZATION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It caused ‘find’ and ‘du’ to dump core, and it was useful
only for obsolescent Linux filesystems anyway.  Problem reported in:
https://lists.gnu.org/r/bug-gnulib/2020-04/msg00068.html
Quite possibly there is still a serious underlying fts bug with
tight-loop-check and mutating file systems, but if so this patch
should cause the bug to be triggered less often.
* lib/fts.c (enum leaf_optimization): Remove
NOSTAT_LEAF_OPTIMIZATION, as it’s problematic.
(S_MAGIC_REISERFS, S_MAGIC_XFS): Remove; no longer needed.
(leaf_optimization): Remove special cases for ReiserFS and XFS.
(fts_read): Remove NOSTAT_LEAF_OPTIMIZATION code.
* lib/fts_.h (struct _ftsent.fts_n_dirs_remaining):
Remove.  All uses removed.
---
 ChangeLog  | 17 +++++++++++++++++
 lib/fts.c  | 56 ++++++++----------------------------------------------
 lib/fts_.h |  5 -----
 3 files changed, 25 insertions(+), 53 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 36f4270c3..cea6074bc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2020-04-15  Paul Eggert  <egg...@cs.ucla.edu>
+
+	fts: remove NOSTAT_LEAF_OPTIMIZATION
+	It caused ‘find’ and ‘du’ to dump core, and it was useful
+	only for obsolescent Linux filesystems anyway.  Problem reported in:
+	https://lists.gnu.org/r/bug-gnulib/2020-04/msg00068.html
+	Quite possibly there is still a serious underlying fts bug with
+	tight-loop-check and mutating file systems, but if so this patch
+	should cause the bug to be triggered less often.
+	* lib/fts.c (enum leaf_optimization): Remove
+	NOSTAT_LEAF_OPTIMIZATION, as it’s problematic.
+	(S_MAGIC_REISERFS, S_MAGIC_XFS): Remove; no longer needed.
+	(leaf_optimization): Remove special cases for ReiserFS and XFS.
+	(fts_read): Remove NOSTAT_LEAF_OPTIMIZATION code.
+	* lib/fts_.h (struct _ftsent.fts_n_dirs_remaining):
+	Remove.  All uses removed.
+
 2020-04-13  Bastien Roucariès  <ro...@debian.org>
 
 	explicit_bzero: Improve code style.
diff --git a/lib/fts.c b/lib/fts.c
index d3a0472a6..ade8c3349 100644
--- a/lib/fts.c
+++ b/lib/fts.c
@@ -445,7 +445,6 @@ fts_open (char * const *argv,
                 if ((parent = fts_alloc(sp, "", 0)) == NULL)
                         goto mem2;
                 parent->fts_level = FTS_ROOTPARENTLEVEL;
-                parent->fts_n_dirs_remaining = -1;
           }
 
         /* The classic fts implementation would call fts_stat with
@@ -634,9 +633,8 @@ fts_close (FTS *sp)
 }
 
 /* Minimum link count of a traditional Unix directory.  When leaf
-   optimization is OK and MIN_DIR_NLINK <= st_nlink, then st_nlink is
-   an upper bound on the number of subdirectories (counting "." and
-   "..").  */
+   optimization is OK and a directory's st_nlink == MIN_DIR_NLINK,
+   then the directory has no subdirectories.  */
 enum { MIN_DIR_NLINK = 2 };
 
 /* Whether leaf optimization is OK for a directory.  */
@@ -645,12 +643,8 @@ enum leaf_optimization
     /* st_nlink is not reliable for this directory's subdirectories.  */
     NO_LEAF_OPTIMIZATION,
 
-    /* Leaf optimization is OK, but is not useful for avoiding stat calls.  */
-    OK_LEAF_OPTIMIZATION,
-
-    /* Leaf optimization is not only OK: it is useful for avoiding
-       stat calls, because dirent.d_type does not work.  */
-    NOSTAT_LEAF_OPTIMIZATION
+    /* st_nlink == 2 means the directory lacks subdirectories.  */
+    OK_LEAF_OPTIMIZATION
   };
 
 #if (defined __linux__ || defined __ANDROID__) \
@@ -663,9 +657,7 @@ enum leaf_optimization
 # define S_MAGIC_CIFS 0xFF534D42
 # define S_MAGIC_NFS 0x6969
 # define S_MAGIC_PROC 0x9FA0
-# define S_MAGIC_REISERFS 0x52654973
 # define S_MAGIC_TMPFS 0x1021994
-# define S_MAGIC_XFS 0x58465342
 
 # ifdef HAVE___FSWORD_T
 typedef __fsword_t fsword;
@@ -782,23 +774,15 @@ dirent_inode_sort_may_be_useful (FTSENT const *p, int dir_fd)
 }
 
 /* Given an FTS entry P for a directory with descriptor DIR_FD,
-   return true if it is both useful and valid to apply leaf optimization.
-   The optimization is useful only for file systems that lack usable
-   dirent.d_type info.  The optimization is valid if an st_nlink value
-   of at least MIN_DIR_NLINK is an upper bound on the number of
-   subdirectories of D, counting "." and ".."  as subdirectories.
+   return whether it is valid to apply leaf optimization.
+   The optimization is valid if a directory's st_nlink value equal
+   to MIN_DIR_NLINK means the directory has no subdirectories.
    DIR_FD is negative if unavailable.  */
 static enum leaf_optimization
 leaf_optimization (FTSENT const *p, int dir_fd)
 {
   switch (filesystem_type (p, dir_fd))
     {
-      /* List here the file system types that may lack usable dirent.d_type
-         info, yet for which the optimization does apply.  */
-    case S_MAGIC_REISERFS:
-    case S_MAGIC_XFS: /* XFS lacked it until 2013-08-22 commit.  */
-      return NOSTAT_LEAF_OPTIMIZATION;
-
     case 0:
       /* Leaf optimization is unsafe if the file system type is unknown.  */
       FALLTHROUGH;
@@ -1023,26 +1007,7 @@ check_for_dir:
                 if (p->fts_info == FTS_NSOK)
                   {
                     if (p->fts_statp->st_size == FTS_STAT_REQUIRED)
-                      {
-                        FTSENT *parent = p->fts_parent;
-                        if (parent->fts_n_dirs_remaining == 0
-                            && ISSET(FTS_NOSTAT)
-                            && ISSET(FTS_PHYSICAL)
-                            && (leaf_optimization (parent, sp->fts_cwd_fd)
-                                == NOSTAT_LEAF_OPTIMIZATION))
-                          {
-                            /* nothing more needed */
-                          }
-                        else
-                          {
-                            p->fts_info = fts_stat(sp, p, false);
-                            if (S_ISDIR(p->fts_statp->st_mode)
-                                && p->fts_level != FTS_ROOTLEVEL
-                                && 0 < parent->fts_n_dirs_remaining
-                                && parent->fts_n_dirs_remaining != (nlink_t) -1)
-                                  parent->fts_n_dirs_remaining--;
-                          }
-                      }
+                      p->fts_info = fts_stat(sp, p, false);
                     else
                       fts_assert (p->fts_statp->st_size == FTS_NO_STAT_REQUIRED);
                   }
@@ -1826,11 +1791,6 @@ err:            memset(sbp, 0, sizeof(struct stat));
         }
 
         if (S_ISDIR(sbp->st_mode)) {
-                p->fts_n_dirs_remaining
-                  = ((sbp->st_nlink < MIN_DIR_NLINK
-                      || p->fts_level <= FTS_ROOTLEVEL)
-                     ? -1
-                     : sbp->st_nlink - (ISSET (FTS_SEEDOT) ? 0 : MIN_DIR_NLINK));
                 if (ISDOT(p->fts_name)) {
                         /* Command-line "." and ".." are real directories. */
                         return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT);
diff --git a/lib/fts_.h b/lib/fts_.h
index 6c7d0cef6..15c248cba 100644
--- a/lib/fts_.h
+++ b/lib/fts_.h
@@ -219,11 +219,6 @@ typedef struct _ftsent {
 
         size_t fts_namelen;             /* strlen(fts_name) */
 
-        /* If not (nlink_t) -1, an upper bound on the number of
-           remaining subdirectories of interest.  If this becomes
-           zero, some work can be avoided.  */
-        nlink_t fts_n_dirs_remaining;
-
 # define FTS_D           1              /* preorder directory */
 # define FTS_DC          2              /* directory that causes cycles */
 # define FTS_DEFAULT     3              /* none of the above */
-- 
2.25.1

Reply via email to