From: Jim Meyering <meyer...@redhat.com> Before this change, traversing (via rm -rf, find, du, etc.) an N-entry directory would require about 256*N bytes of memory. Thus, it was easy to construct a directory too large to be processed by any of those tools. With this change, the maximum memory utilization is now limited, for the most common type of traversal.
* lib/fts.c (FTS_MAX_READDIR_ENTRIES): Define. (fts_read): When we've processed the final entry (i.e., when ->fts_link is NULL) and fts_dirp is non-NULL, call fts_build using the parent entry to read any remaining entries. Dispatch depending on what fts_build returns: - NULL+stop, aka failure: stop - NULL otherwise: move up in the dir hierarchy - non-NULL: handle this new entry (fts_build): Declare and use new local, continue_readdir. Prepare to be called from fts_read, when the entries from a partially-read directory have just been exhausted. In that case, we'll skip the opendir and instead use the parent's fts_dirp and derive dir_fd from that. Finally, in the readdir loop, if we read max_entries entries, exit the loop ensuring *not* to call closedir. This is required so that fts_dirp can be reused on a subsequent call. Prompted by Ben England's report of memory exhaustion in find and rm -rf vs. NFS: https://bugzilla.redhat.com/719749. --- lib/fts.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 115 insertions(+), 29 deletions(-) diff --git a/lib/fts.c b/lib/fts.c index 48919f7..26cd444 100644 --- a/lib/fts.c +++ b/lib/fts.c @@ -134,12 +134,21 @@ enum # define D_INO(dp) NOT_AN_INODE_NUMBER #endif +/* If possible (see max_entries, below), read no more than this many directory + entries at a time. Without this limit (i.e., when using non-NULL + fts_compar), processing a directory with 4,000,000 entries requires ~1GiB + of memory, and handling 64M entries would require 16GiB of memory. */ +#ifndef FTS_MAX_READDIR_ENTRIES +# define FTS_MAX_READDIR_ENTRIES 100000 +#endif + /* If there are more than this many entries in a directory, and the conditions mentioned below are satisfied, then sort the entries on inode number before any further processing. */ #ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD # define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000 #endif + enum { _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD @@ -908,6 +917,27 @@ fts_read (register FTS *sp) /* Move to the next node on this level. */ next: tmp = p; + + /* If we have so many directory entries that we're reading them + in batches, and we've reached the end of the current batch, + read in a new batch. */ + if (p->fts_link == NULL && p->fts_parent->fts_dirp) + { + p = tmp->fts_parent; + sp->fts_cur = p; + sp->fts_path[p->fts_pathlen] = '\0'; + + if ((p = fts_build (sp, BREAD)) == NULL) + { + if (ISSET(FTS_STOP)) + return NULL; + goto cd_dot_dot; + } + + free(tmp); + goto name; + } + if ((p = p->fts_link) != NULL) { sp->fts_cur = p; free(tmp); @@ -996,6 +1026,7 @@ check_for_dir: } return p; } +cd_dot_dot: /* Move up to the parent node. */ p = tmp->fts_parent; @@ -1243,40 +1274,80 @@ fts_build (register FTS *sp, int type) char *cp; int dir_fd; FTSENT *cur = sp->fts_cur; + bool continue_readdir = !!cur->fts_dirp; - /* Open the directory for reading. If this fails, we're done. - If being called from fts_read, set the fts_info field. */ - if ((cur->fts_dirp = __opendir2(cur->fts_accpath, &dir_fd)) == NULL) { - if (type == BREAD) { - cur->fts_info = FTS_DNR; - cur->fts_errno = errno; - } - return (NULL); - } - /* Rather than calling fts_stat for each and every entry encountered - in the readdir loop (below), stat each directory only right after - opening it. */ - if (cur->fts_info == FTS_NSOK) - cur->fts_info = fts_stat(sp, cur, false); - else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) + /* When cur->fts_dirp is non-NULL, that means we should + continue calling readdir on that existing DIR* pointer + rather than opening a new one. */ + if (continue_readdir) + { + DIR *dp = cur->fts_dirp; + dir_fd = dirfd (dp); + if (dir_fd < 0) + { + closedir_and_clear (cur->fts_dirp); + if (type == BREAD) + { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return NULL; + } + } + else { - /* Now read the stat info again after opening a directory to - reveal eventual changes caused by a submount triggered by - the traversal. But do it only for utilities which use - FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du - benefit/suffer from this feature for now. */ - LEAVE_DIR (sp, cur, "4"); - fts_stat (sp, cur, false); - if (! enter_dir (sp, cur)) + /* Open the directory for reading. If this fails, we're done. + If being called from fts_read, set the fts_info field. */ + if ((cur->fts_dirp = __opendir2(cur->fts_accpath, &dir_fd)) == NULL) { - __set_errno (ENOMEM); + if (type == BREAD) + { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } return NULL; } + /* Rather than calling fts_stat for each and every entry encountered + in the readdir loop (below), stat each directory only right after + opening it. */ + if (cur->fts_info == FTS_NSOK) + cur->fts_info = fts_stat(sp, cur, false); + else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) + { + /* Now read the stat info again after opening a directory to + reveal eventual changes caused by a submount triggered by + the traversal. But do it only for utilities which use + FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du + benefit/suffer from this feature for now. */ + LEAVE_DIR (sp, cur, "4"); + fts_stat (sp, cur, false); + if (! enter_dir (sp, cur)) + { + __set_errno (ENOMEM); + return NULL; + } + } } - /* Nlinks is the number of possible entries of type directory in the - directory if we're cheating on stat calls, 0 if we're not doing - any stat calls at all, (nlink_t) -1 if we're statting everything. */ + /* Maximum number of readdir entries to read at one time. + This limitation is to avoid reading millions of entries + into memory at once. However, When an fts_compar function + is specified, we have no choice: we must read all entries + into memory before calling that function. But when no such + function is specified, we can read entries in batches that are + large enough to help us with inode-sorting, yet not so large + that we risk exhausting memory. The other conditionals ensure + that we are using the *at functions (FTS_CWDFD) and that we + are not in no-chdir mode (induced by use of FTS_LOGICAL). */ + size_t max_entries = + (sp->fts_compar == NULL && ISSET (FTS_CWDFD) && ! ISSET (FTS_NOCHDIR) + ? FTS_MAX_READDIR_ENTRIES : SIZE_MAX); + + /* + * Nlinks is the number of possible entries of type directory in the + * directory if we're cheating on stat calls, 0 if we're not doing + * any stat calls at all, (nlink_t) -1 if we're statting everything. + */ if (type == BNAMES) { nlinks = 0; /* Be quiet about nostat, GCC. */ @@ -1305,7 +1376,13 @@ fts_build (register FTS *sp, int type) * needed sorted entries or stat information, they had better be * checking FTS_NS on the returned nodes. */ - if (nlinks || type == BREAD) { + if (continue_readdir) + { + /* When resuming a short readdir run, we already have + the required dirp and dir_fd. */ + descend = true; + } + else if (nlinks || type == BREAD) { if (ISSET(FTS_CWDFD)) { dir_fd = dup (dir_fd); @@ -1467,10 +1544,19 @@ mem1: saved_errno = errno; tail = p; } ++nitems; + if (max_entries <= nitems) { + /* When there are too many dir entries, leave + fts_dirp open, so that a subsequent fts_read + can take up where we leave off. */ + goto break_without_closedir; + } } + if (cur->fts_dirp) closedir_and_clear(cur->fts_dirp); + break_without_closedir: + /* * If realloc() changed the address of the file name, adjust the * addresses for the rest of the tree and the dir list. @@ -1495,7 +1581,7 @@ mem1: saved_errno = errno; * to an empty directory, we wind up here with no other way back. If * can't get back, we're done. */ - if (descend && (type == BCHILD || !nitems) && + if (!continue_readdir && descend && (type == BCHILD || !nitems) && (cur->fts_level == FTS_ROOTLEVEL ? RESTORE_INITIAL_CWD(sp) : fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) { -- 1.7.6.857.gf34cf