From: xiaofeidu <xiaofe...@meta.com> mu is a new tool which shows the amount of data that a set of files are keeping into the cache. mu uses the new cachestat() syscall introduced with Linux 6.5.
Example usage, we use mu over gcc libraries, before and after calling gcc for the first time after boot: $ src/mu --all --human-readable --threshold=1 /usr/lib/gcc/ $ gcc /dev/null -o /dev/zero /usr/bin/ld: /usr/lib64/crt1.o: in function `__wrap_main': (.text+0x38): undefined reference to `main' collect2: error: ld returned 1 exit status $ src/mu --all --human-readable --threshold=1 /usr/lib/gcc/ 4.0K /usr/lib/gcc/aarch64-redhat-linux/14/crtbegin.o 4.0K /usr/lib/gcc/aarch64-redhat-linux/14/crtend.o 128K /usr/lib/gcc/aarch64-redhat-linux/14/libgcc.a 4.0K /usr/lib/gcc/aarch64-redhat-linux/14/libgcc_s.so 140K /usr/lib/gcc/aarch64-redhat-linux/14 140K /usr/lib/gcc/aarch64-redhat-linux 140K /usr/lib/gcc/ Co-authored-by: Matteo Croce <teknora...@meta.com> --- AUTHORS | 1 + NEWS | 4 + README | 2 +- THANKS.in | 2 + build-aux/gen-lists-of-programs.sh | 1 + doc/coreutils.texi | 299 +++++++ man/.gitignore | 1 + man/local.mk | 1 + man/mu.x | 28 + scripts/git-hooks/commit-msg | 2 +- src/.gitignore | 1 + src/local.mk | 1 + src/mu.c | 1237 ++++++++++++++++++++++++++++ tests/mu/basic.sh | 65 ++ tests/mu/bigtime.sh | 52 ++ tests/mu/bind-mount-dir-cycle.sh | 38 + tests/mu/deref-args.sh | 38 + tests/mu/deref.sh | 38 + tests/mu/exclude.sh | 58 ++ tests/mu/fd-leak.sh | 39 + tests/mu/files0-from-dir.sh | 39 + tests/mu/format.sh | 64 ++ tests/mu/hard-link.sh | 61 ++ tests/mu/inacc-dir.sh | 43 + tests/mu/max-depth.sh | 39 + tests/mu/no-deref.sh | 33 + tests/mu/no-x.sh | 49 ++ tests/mu/one-file-system.sh | 51 ++ tests/mu/slash.sh | 33 + tests/mu/threshold.sh | 42 + tests/mu/two-args.sh | 40 + 31 files changed, 2400 insertions(+), 2 deletions(-) create mode 100644 man/mu.x create mode 100644 src/mu.c create mode 100755 tests/mu/basic.sh create mode 100755 tests/mu/bigtime.sh create mode 100755 tests/mu/bind-mount-dir-cycle.sh create mode 100755 tests/mu/deref-args.sh create mode 100755 tests/mu/deref.sh create mode 100755 tests/mu/exclude.sh create mode 100755 tests/mu/fd-leak.sh create mode 100755 tests/mu/files0-from-dir.sh create mode 100755 tests/mu/format.sh create mode 100755 tests/mu/hard-link.sh create mode 100755 tests/mu/inacc-dir.sh create mode 100755 tests/mu/max-depth.sh create mode 100755 tests/mu/no-deref.sh create mode 100755 tests/mu/no-x.sh create mode 100755 tests/mu/one-file-system.sh create mode 100755 tests/mu/slash.sh create mode 100755 tests/mu/threshold.sh create mode 100755 tests/mu/two-args.sh diff --git a/AUTHORS b/AUTHORS index d5c12aa97..13e9524cc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -51,6 +51,7 @@ mkdir: David MacKenzie mkfifo: David MacKenzie mknod: David MacKenzie mktemp: Jim Meyering, Eric Blake +mu: Xiaofei Du mv: Mike Parker, David MacKenzie, Jim Meyering nice: David MacKenzie nl: Scott Bartram, David MacKenzie diff --git a/NEWS b/NEWS index c9ba5f196..26e8f02fc 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ GNU coreutils NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** New commands + + mu: a new tool to show memory usage of files + ** Bug fixes `ls -Z dir` would crash. diff --git a/README b/README index c0d549ff3..3e60de66e 100644 --- a/README +++ b/README @@ -11,7 +11,7 @@ The programs that can be built with this package are: chroot cksum comm coreutils cp csplit cut date dd df dir dircolors dirname du echo env expand expr factor false fmt fold groups head hostid hostname id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp - mv nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf ptx + mu mv nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf ptx pwd readlink realpath rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty sum sync tac tail tee test timeout touch tr true truncate tsort tty uname unexpand diff --git a/THANKS.in b/THANKS.in index 57ace387e..06a7149e6 100644 --- a/THANKS.in +++ b/THANKS.in @@ -314,6 +314,7 @@ Joey Hess jo...@debian.org Johan Boule bo...@bohan.dyndns.org Johan Danielsson j...@pdc.kth.se Johannes Altmanninger aclo...@gmail.com +Johannes Weiner jwei...@meta.com John Bley j...@acpub.duke.edu John Gatewood Ham zappa...@alphabox.compsci.buu.ac.th John Gotts jgo...@umich.edu @@ -472,6 +473,7 @@ Nao Nishijima nao.nishijima...@hitachi.com Neal H Walfield n...@cs.uml.edu Neil F. Brown ne...@suse.de Nelson H. F. Beebe be...@math.utah.edu +Nhat Pham hoangnh...@meta.com Nick Estes deb...@nickstoys.com Nick Graham nick.d.gra...@gmail.com Nick Lawes nla...@silverplatter.com diff --git a/build-aux/gen-lists-of-programs.sh b/build-aux/gen-lists-of-programs.sh index 4b6af8a4f..4799026ab 100755 --- a/build-aux/gen-lists-of-programs.sh +++ b/build-aux/gen-lists-of-programs.sh @@ -85,6 +85,7 @@ normal_progs=' mkfifo mknod mktemp + mu mv nl nproc diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 45f4c8002..f25a71229 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -87,6 +87,7 @@ * mkfifo: (coreutils)mkfifo invocation. Create FIFOs (named pipes). * mknod: (coreutils)mknod invocation. Create special files. * mktemp: (coreutils)mktemp invocation. Create temporary files. +* mu: (coreutils)mu invocation. Report file memory usage. * mv: (coreutils)mv invocation. Rename files. * nice: (coreutils)nice invocation. Modify niceness. * nl: (coreutils)nl invocation. Number lines and write files. @@ -200,6 +201,7 @@ Free Documentation License''. * Special file types:: mkdir rmdir unlink mkfifo mknod ln link readlink * Changing file attributes:: chgrp chmod chown touch * File space usage:: df du stat sync truncate +* File memory usage:: mu * Printing text:: echo printf yes * Conditions:: false true test expr * Redirection:: tee @@ -357,6 +359,10 @@ File space usage * sync invocation:: Synchronize cached writes to persistent storage * truncate invocation:: Shrink or extend the size of a file +File memory usage + +* mu invocation:: Report file memory usage + Printing text * echo invocation:: Print a line of text @@ -13293,6 +13299,299 @@ the size of each @var{file} based on its current size: @exitstatus +@node File memory usage +@chapter File memory usage + +@cindex File memory usage +@cindex memory usage + +These commands report how much memory files use + +@menu +* mu invocation:: Report file memory usage +@end menu + + +@node mu invocation +@section @command{mu}: Report file memory usage + +@pindex mu +@cindex file memory usage +@cindex memory usage for files + +@command{mu} reports the memory used for a set of files. +Synopsis: + +@example +mu [@var{option}]@dots{} [@var{file}]@dots{} +@end example + +With no arguments, @command{mu} reports the memory used by the +files at or under the current directory. +Normally the memory is printed in units of +1024 bytes, but this can be overridden (@pxref{Block size}). +Non-integer quantities are rounded up to the next higher unit. + +If two or more hard links point to the same file, only one of the hard +links is counted. The @var{file} argument order affects which links +are counted, and changing the argument order may change the numbers +and entries that @command{mu} outputs. + +The program accepts the following options. Also see @ref{Common options}. + +@table @samp + +@optNull + +@item -a +@itemx --all +@opindex -a +@opindex --all +Show counts for all files, not just directories. + +@item -B @var{size} +@itemx --block-size=@var{size} +@opindex -B +@opindex --block-size +@cindex memory sizes +Scale sizes by @var{size} before printing them (@pxref{Block size}). +For example, @option{-BG} prints sizes in units of 1,073,741,824 bytes. + +@item -b +@itemx --bytes +@opindex -b +@opindex --bytes +Equivalent to @code{--block-size=1}. + +@item -c +@itemx --total +@opindex -c +@opindex --total +@cindex grand total of file memory usage +Print a grand total of all arguments after all arguments have +been processed. This can be used to find out the total file memory usage of +a given set of files or directories. + +@item -D +@itemx --dereference-args +@opindex -D +@opindex --dereference-args +Dereference symbolic links that are command line arguments. +Does not affect other symbolic links. This is helpful for finding +out the file memory usage of directories, such as @file{/usr/tmp}, which +are often symbolic links. + +@item -d @var{depth} +@itemx --max-depth=@var{depth} +@opindex -d @var{depth} +@opindex --max-depth=@var{depth} +@cindex limiting output of @command{mu} +Show the total for each directory (and file if @option{--all}) that is at +most MAX_DEPTH levels down from the root of the hierarchy. The root +is at level 0, so @code{mu --max-depth=0} is equivalent to @code{mu -s}. + +@c --files0-from=FILE +@filesZeroFromOption{mu,, with the @option{--total} (@option{-c}) option} + +@item -f +@itemx --format=@var{format} +@opindex -f +@opindex --format=@var{format} +@cindex use specified FORMAT for output instead of the default +FORMAT is automatically newline-terminated, so running a command like the +following with two or more FILE operands produces a line of output for +each operand: + +@example +$ mu -f '%c %d' file_a file_b +4 0 file_a +8 0 file_b +@end example + +The valid @var{format} directives for memory usage with @option{--format} are: + +@itemize @bullet +@item %c -- memory cached in the page cache +@item %d -- dirty memory (have been modified and not yet written back + to persistent storage) +@item %w -- memory currently being written back +@item %e -- memory were once resident in the cache but has since been forced out +@item %r -- memory that has been forced out in the recent past. In this case, the + 'recent past' is defined by the memory that has been evicted since + the memory in question was forced out +@end itemize + +@item -H +@opindex -H +Equivalent to @option{--dereference-args} (@option{-D}). + +@optHumanReadable + +@item -k +@opindex -k +@cindex kibibytes for memory sizes +Print sizes in 1024-byte blocks, overriding the default block size +(@pxref{Block size}). +This option is equivalent to @option{--block-size=1K}. + +@item -L +@itemx --dereference +@opindex -L +@opindex --dereference +@cindex symbolic links, dereferencing in @command{mu} +Dereference symbolic links (show the file memory space used by the file +or directory that the link points to instead of the memory used by +the link). + +@item -l +@itemx --count-links +@opindex -l +@opindex --count-links +@cindex hard links, counting in @command{mu} +Count the size of all files, even if they have appeared already (as a +hard link). + +@item -m +@opindex -m +@cindex megabytes for memory sizes +Print sizes in 1,048,576-byte blocks, overriding the default block size +(@pxref{Block size}). +This option is equivalent to @option{--block-size=1M}. + +@item -P +@itemx --no-dereference +@opindex -P +@opindex --no-dereference +@cindex symbolic links, dereferencing in @command{mu} +For each symbolic link encountered by @command{mu}, +consider the file memory used by the symbolic link itself. + +@item -S +@itemx --separate-dirs +@opindex -S +@opindex --separate-dirs +Normally, in the output of @command{mu} (when not using @option{--summarize}), +the size listed next to a directory name, @var{d}, represents the sum +of sizes of all entries beneath @var{d} as well as the size of @var{d} itself. +With @option{--separate-dirs}, the size reported for a directory name, +@var{d}, will exclude the size of any subdirectories. + +@optSi + +@item -s +@itemx --summarize +@opindex -s +@opindex --summarize +Display only a total for each argument. + +@item -t @var{size} +@itemx --threshold=@var{size} +@opindex -t +@opindex --threshold +Exclude entries based on a given @var{size}. + +If @var{size} is positive, then @command{mu} will only print entries with a size +greater than or equal to that. + +If @var{size} is negative, then @command{mu} will only print entries with a size +smaller than or equal to that. + +@item --time +@opindex --time +@cindex last modified dates, displaying in @command{mu} +Show the most recent modification timestamp (mtime) of any file in the +directory, or any of its subdirectories. @xref{File timestamps}. + +@item --time=ctime +@itemx --time=status +@itemx --time=use +@opindex --time +@opindex ctime@r{, show the most recent} +@opindex status time@r{, show the most recent} +@opindex use time@r{, show the most recent} +Show the most recent status change timestamp (ctime) of any file in +the directory, or any of its subdirectories. @xref{File timestamps}. + +@item --time=atime +@itemx --time=access +@opindex --time +@opindex atime@r{, show the most recent} +@opindex access timestamp@r{, show the most recent} +Show the most recent access timestamp (atime) of any file in the +directory, or any of its subdirectories. @xref{File timestamps}. + +@item --time-style=@var{style} +@opindex --time-style +@cindex time style +List timestamps in style @var{style}. This option has an effect only if +the @option{--time} option is also specified. The @var{style} should +be one of the following: + +@table @samp +@item +@var{format} +@vindex LC_TIME +List timestamps using @var{format}, where @var{format} is interpreted +like the format argument of @command{date} (@pxref{date invocation}). +For example, @option{--time-style="+%Y-%m-%d %H:%M:%S"} causes +@command{du} to list timestamps like @samp{2020-07-21 23:45:56}. As +with @command{date}, @var{format}'s interpretation is affected by the +@env{LC_TIME} locale category. + +@item full-iso +List timestamps in full using ISO 8601-like date, time, and time zone +components with nanosecond precision, e.g., @samp{2020-07-21 +23:45:56.477817180 -0400}. This style is equivalent to +@samp{+%Y-%m-%d %H:%M:%S.%N %z}. + +@item long-iso +List ISO 8601 date and time components with minute precision, e.g., +@samp{2020-07-21 23:45}. These timestamps are shorter than +@samp{full-iso} timestamps, and are usually good enough for everyday +work. This style is equivalent to @samp{+%Y-%m-%d %H:%M}. + +@item iso +List ISO 8601 dates for timestamps, e.g., @samp{2020-07-21}. +This style is equivalent to @samp{+%Y-%m-%d}. +@end table + +@vindex TIME_STYLE +You can specify the default value of the @option{--time-style} option +with the environment variable @env{TIME_STYLE}; if @env{TIME_STYLE} is not set +the default style is @samp{long-iso}. For compatibility with @command{ls}, +if @env{TIME_STYLE} begins with @samp{+} and contains a newline, +the newline and any later characters are ignored; if @env{TIME_STYLE} +begins with @samp{posix-} the @samp{posix-} is ignored; and if +@env{TIME_STYLE} is @samp{locale} it is ignored. + +@item -X @var{file} +@itemx --exclude-from=@var{file} +@opindex -X @var{file} +@opindex --exclude-from=@var{file} +@cindex excluding files from @command{mu} +Like @option{--exclude}, except take the patterns to exclude from @var{file}, +one per line. If @var{file} is @samp{-}, take the patterns from standard +input. + +@item --exclude=@var{pattern} +@opindex --exclude=@var{pattern} +@cindex excluding files from @command{mu} +When recursing, skip subdirectories or files matching @var{pattern}. +For example, @code{mu --exclude='*.o'} excludes files whose names +end in @samp{.o}. + +@item -x +@itemx --one-file-system +@opindex -x +@opindex --one-file-system +@cindex one file system, restricting @command{mu} to +Skip directories that are on different file systems from the one that +the argument being processed is on. + +@end table + +@exitstatus + + @node Printing text @chapter Printing text diff --git a/man/.gitignore b/man/.gitignore index 4eecb7833..89985a0f4 100644 --- a/man/.gitignore +++ b/man/.gitignore @@ -47,6 +47,7 @@ mkdir.1 mkfifo.1 mknod.1 mktemp.1 +mu.1 mv.1 nice.1 nl.1 diff --git a/man/local.mk b/man/local.mk index ae74bf6ce..641574938 100644 --- a/man/local.mk +++ b/man/local.mk @@ -116,6 +116,7 @@ man/mkdir.1: src/mkdir$(EXEEXT) man/mkfifo.1: src/mkfifo$(EXEEXT) man/mknod.1: src/mknod$(EXEEXT) man/mktemp.1: src/mktemp$(EXEEXT) +man/mu.1: src/mu$(EXEEXT) man/mv.1: src/mv$(EXEEXT) man/nice.1: src/nice$(EXEEXT) man/nl.1: src/nl$(EXEEXT) diff --git a/man/mu.x b/man/mu.x new file mode 100644 index 000000000..8e41bc7b8 --- /dev/null +++ b/man/mu.x @@ -0,0 +1,28 @@ +'\" Copyright (C) 2025 Free Software Foundation, Inc. +'\" +'\" This is free software. You may redistribute copies of it under the terms +'\" of the GNU General Public License <https://www.gnu.org/licenses/gpl.html>. +'\" There is NO WARRANTY, to the extent permitted by law. +[NAME] +mu \- estimate file memory usage +[DESCRIPTION] +.\" Add any additional description here +[PATTERNS] +PATTERN is a shell pattern (not a regular expression). The pattern +.B ?\& +matches any one character, whereas +.B * +matches any string (composed of zero, one or multiple characters). For +example, +.B *.o +will match any files whose names end in +.BR .o . +Therefore, the command +.IP +.B mu \-\-exclude=\(aq*.o\(aq +.PP +will skip all files and subdirectories ending in +.B .o +(including the file +.B .o +itself). diff --git a/scripts/git-hooks/commit-msg b/scripts/git-hooks/commit-msg index da094c95a..ea79a7e6e 100755 --- a/scripts/git-hooks/commit-msg +++ b/scripts/git-hooks/commit-msg @@ -18,7 +18,7 @@ my @valid = qw( chroot cksum comm cp csplit cut date dd df dir dircolors dirname du echo env expand expr factor false fmt fold groups head hostid hostname id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp - mv nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf + mv mu nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf ptx pwd readlink realpath rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty sum sync tac tail tee test timeout touch tr true truncate tsort diff --git a/src/.gitignore b/src/.gitignore index 55f9660c6..36696d849 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -57,6 +57,7 @@ mkdir mkfifo mknod mktemp +mu mv nice nl diff --git a/src/local.mk b/src/local.mk index fd9dc81c2..3ba9b14d9 100644 --- a/src/local.mk +++ b/src/local.mk @@ -160,6 +160,7 @@ src_mkdir_LDADD = $(LDADD) src_mkfifo_LDADD = $(LDADD) src_mknod_LDADD = $(LDADD) src_mktemp_LDADD = $(LDADD) +src_mu_LDADD = $(LDADD) src_mv_LDADD = $(LDADD) src_nice_LDADD = $(LDADD) src_nl_LDADD = $(LDADD) diff --git a/src/mu.c b/src/mu.c new file mode 100644 index 000000000..97c7fdab1 --- /dev/null +++ b/src/mu.c @@ -0,0 +1,1237 @@ +/* mu -- summarize memory usage + Copyright (C) 2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" +#include "argmatch.h" +#include "argv-iter.h" +#include "assure.h" +#include "di-set.h" +#include "exclude.h" +#include "fprintftime.h" +#include "human.h" +#include "mountlist.h" +#include "quote.h" +#include "stat-size.h" +#include "stat-time.h" +#include "stdio--.h" +#include "xfts.h" +#include "xstrtol.h" +#include "xstrtol-error.h" + +#include <fcntl.h> +#include <linux/mman.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <unistd.h> + +extern bool fts_debug; + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "mu" + +#define AUTHORS \ + proper_name ("Xiaofei Du") + +#if DU_DEBUG +# define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts) +#else +# define FTS_CROSS_CHECK(Fts) +#endif + +/* A set of dev/ino pairs to help identify files and directories + whose sizes have already been counted. */ +static struct di_set *di_files; + +/* A set containing a dev/ino pair for each local mount point directory. */ +static struct di_set *di_mnt; + +/* Keep track of the preceding "level" (depth in hierarchy) + from one call of process_file to the next. */ +static size_t prev_level; + +struct muinfo +{ + uintmax_t cache_size; + uintmax_t dirty_size; + uintmax_t writeback_size; + uintmax_t evicted_size; + uintmax_t recently_evicted_size; + struct timespec tmax; +}; + +static int +cachestat(int fd, struct cachestat_range* cstat_range, + struct cachestat* cstat, unsigned int flags) +{ + return syscall(__NR_cachestat, fd, cstat_range, cstat, flags); +} + +static inline void +muinfo_init(struct muinfo* mui) +{ + mui->cache_size = 0; + mui->dirty_size = 0; + mui->writeback_size = 0; + mui->evicted_size = 0; + mui->recently_evicted_size = 0; + mui->tmax.tv_sec = TYPE_MINIMUM (time_t); + mui->tmax.tv_nsec = -1; +} + +static inline void +muinfo_add(struct muinfo* first, const struct muinfo* second) +{ + uintmax_t sum = first->cache_size + second->cache_size; + first->cache_size = first->cache_size <= sum ? sum : UINTMAX_MAX; + + sum = first->dirty_size + second->dirty_size; + first->dirty_size = first->dirty_size <= sum ? sum : UINTMAX_MAX; + + sum = first->writeback_size + second->writeback_size; + first->writeback_size = first->writeback_size <= sum ? sum : UINTMAX_MAX; + + sum = first->evicted_size + second->evicted_size; + first->evicted_size = first->evicted_size <= sum ? sum : UINTMAX_MAX; + + sum = first->recently_evicted_size + second->recently_evicted_size; + first->recently_evicted_size = first->recently_evicted_size <= sum ? sum : UINTMAX_MAX; + + if (timespec_cmp (first->tmax, second->tmax) < 0) + first->tmax = second->tmax; +} + +struct mulevel +{ + struct muinfo ent; + struct muinfo subdir; +}; + +/* If true, display counts for all files, not just directories. */ +static bool opt_all = false; + +/* If true, count each hard link of files with multiple links. */ +static bool opt_count_all = false; + +/* If true, hash all files to look for hard links. */ +static bool hash_all; + +/* If true, output the NUL byte instead of a newline at the end of each line. */ +static bool opt_nul_terminate_output = false; + +/* If true, print a grand total at the end. */ +static bool print_grand_total = false; + +/* If nonzero, do not add sizes of subdirectories. */ +static bool opt_separate_dirs = false; + +/* Show the total for each directory (and file if --all) that is at + most MAX_DEPTH levels down from the root of the hierarchy. The root + is at level 0, so 'mu --max-depth=0' is equivalent to 'mu -s'. */ +static idx_t max_depth = IDX_MAX; + +/* Only output entries with at least this SIZE if positive, + or at most if negative. See --threshold option. */ +static intmax_t opt_threshold = 0; + +/* Human-readable options for output. */ +static int human_output_opts; + +/* If true, print most recently modified date, using the specified format. */ +static bool opt_time = false; + +/* Type of time to display. controlled by --time. */ + +enum time_type + { + time_mtime, /* default */ + time_ctime, + time_atime + }; + +static enum time_type time_type = time_mtime; + +/* User specified date / time style */ +static char const *time_style = nullptr; + +/* Format used to display date / time. Controlled by --time-style */ +static char const *time_format = nullptr; + +/* The local time zone rules, as per the TZ environment variable. */ +static timezone_t localtz; + +/* The units to use when printing sizes. */ +static uintmax_t output_block_size; + +/* File name patterns to exclude. */ +static struct exclude *exclude; + +static struct muinfo tot_mui; + +#define IS_DIR_TYPE(Type) \ + ((Type) == FTS_DP \ + || (Type) == FTS_DNR) + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + EXCLUDE_OPTION = CHAR_MAX + 1, + FILES0_FROM_OPTION, + HUMAN_SI_OPTION, + FTS_DEBUG, + TIME_OPTION, + TIME_STYLE_OPTION, +}; + +static struct option const long_options[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"block-size", required_argument, nullptr, 'B'}, + {"bytes", no_argument, nullptr, 'b'}, + {"count-links", no_argument, nullptr, 'l'}, + /* {"-debug", no_argument, nullptr, FTS_DEBUG}, */ + {"dereference", no_argument, nullptr, 'L'}, + {"dereference-args", no_argument, nullptr, 'D'}, + {"exclude", required_argument, nullptr, EXCLUDE_OPTION}, + {"exclude-from", required_argument, nullptr, 'X'}, + {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION}, + {"human-readable", no_argument, nullptr, 'h'}, + {"si", no_argument, nullptr, HUMAN_SI_OPTION}, + {"max-depth", required_argument, nullptr, 'd'}, + {"null", no_argument, nullptr, '0'}, + {"no-dereference", no_argument, nullptr, 'P'}, + {"one-file-system", no_argument, nullptr, 'x'}, + {"separate-dirs", no_argument, nullptr, 'S'}, + {"summarize", no_argument, nullptr, 's'}, + {"total", no_argument, nullptr, 'c'}, + {"threshold", required_argument, nullptr, 't'}, + {"time", optional_argument, nullptr, TIME_OPTION}, + {"time-style", required_argument, nullptr, TIME_STYLE_OPTION}, + {"format", required_argument, nullptr, 'f'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static char const *const time_args[] = +{ + "atime", "access", "use", "ctime", "status", nullptr +}; +static enum time_type const time_types[] = +{ + time_atime, time_atime, time_atime, time_ctime, time_ctime +}; +ARGMATCH_VERIFY (time_args, time_types); + +/* 'full-iso' uses full ISO-style dates and times. 'long-iso' uses longer + ISO-style timestamps, though shorter than 'full-iso'. 'iso' uses shorter + ISO-style timestamps. */ +enum time_style + { + full_iso_time_style, /* --time-style=full-iso */ + long_iso_time_style, /* --time-style=long-iso */ + iso_time_style /* --time-style=iso */ + }; + +static char const *const time_style_args[] = +{ + "full-iso", "long-iso", "iso", nullptr +}; +static enum time_style const time_style_types[] = +{ + full_iso_time_style, long_iso_time_style, iso_time_style +}; +ARGMATCH_VERIFY (time_style_args, time_style_types); + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [OPTION]... --files0-from=F\n\ +"), program_name, program_name); + fputs (_("\ +Summarize memory usage of the set of FILEs, recursively for directories.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -0, --null end each output line with NUL, not newline\n\ + -a, --all write counts for all files, not just directories\n\ +"), stdout); + fputs (_("\ + -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ + '-BM' prints sizes in units of 1,048,576 bytes;\n\ + see SIZE format below\n\ + -b, --bytes equivalent to '--block-size=1'\n\ + -c, --total produce a grand total\n\ + -D, --dereference-args dereference only symlinks that are listed on the\n\ + command line\n\ + -d, --max-depth=N print the total for a directory (or file, with --all)\n\ + only if it is N or fewer levels below the command\n\ + line argument; --max-depth=0 is the same as\n\ + --summarize\n\ +"), stdout); + fputs (_("\ + --files0-from=F summarize device usage of the\n\ + NUL-terminated file names specified in file F;\n\ + if F is -, then read names from standard input\n\ + -f, --format=FORMAT use the specified FORMAT for output instead of the\n\ + default; Only cached bytes are printed by default\n\ + -H equivalent to --dereference-args (-D)\n\ + -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\ +\n\ +"), stdout); + fputs (_("\ + -k like --block-size=1K\n\ + -L, --dereference dereference all symbolic links\n\ + -l, --count-links count sizes many times if hard linked\n\ + -m like --block-size=1M\n\ +"), stdout); + fputs (_("\ + -P, --no-dereference don't follow any symbolic links (this is the default)\n\ + -S, --separate-dirs for directories do not include size of subdirectories\n\ + --si like -h, but use powers of 1000 not 1024\n\ + -s, --summarize display only a total for each argument\n\ +"), stdout); + fputs (_("\ + -t, --threshold=SIZE exclude entries smaller than SIZE if positive,\n\ + or entries greater than SIZE if negative\n\ + --time show time of the last modification of any file in the\n\ + directory, or any of its subdirectories\n\ + --time=WORD show time as WORD instead of modification time:\n\ + atime, access, use, ctime or status\n\ + --time-style=STYLE show times using STYLE, which can be:\n\ + full-iso, long-iso, iso, or +FORMAT;\n\ + FORMAT is interpreted like in 'date'\n\ +"), stdout); + fputs (_("\ + -X, --exclude-from=FILE exclude files that match any pattern in FILE\n\ + --exclude=PATTERN exclude files that match PATTERN\n\ + -x, --one-file-system skip directories on different file systems\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\n\ +The valid format sequences are:\n\ +\n\ + %c memory cached in the page cache\n\ + %d dirty memory (have been modified and not yet written back\n\ + to persistent storage)\n\ + %w memory currently being written back\n\ + %e memory were once resident in the cache but has since been forced out\n\ + %r memory that has been forced out in the recent past. In this case, the\n\ + 'recent past' is defined by the memory that has been evicted since\n\ + the memory in question was forced out\n\ +"), stdout); + emit_blocksize_note ("MU"); + emit_size_note (); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Try to insert the INO/DEV pair into DI_SET. + Return true if the pair is successfully inserted, + false if the pair was already there. */ +static bool +hash_ins (struct di_set *di_set, ino_t ino, dev_t dev) +{ + int inserted = di_set_insert (di_set, dev, ino); + if (inserted < 0) + xalloc_die (); + return inserted; +} + +/* FIXME: this code is nearly identical to code in date.c */ +/* Display the date and time in WHEN according to the format specified + in FORMAT. */ + +static void +show_date (char const *format, struct timespec when, timezone_t tz) +{ + struct tm tm; + if (localtime_rz (tz, &when.tv_sec, &tm)) + fprintftime (stdout, format, &tm, tz, when.tv_nsec); + else + { + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + char *when_str = timetostr (when.tv_sec, buf); + error (0, 0, _("time %s is out of range"), quote (when_str)); + fputs (when_str, stdout); + } +} + +/* Print N_BYTES. Convert it to a readable value before printing. */ + +static void +print_only_size (uintmax_t n_bytes) +{ + char buf[LONGEST_HUMAN_READABLE + 1]; + fputs ((n_bytes == UINTMAX_MAX + ? _("Infinity") + : human_readable (n_bytes, buf, human_output_opts, + 1, output_block_size)), + stdout); +} + +static void +mu_print_stat (const struct muinfo *pmui, char m) +{ + switch (m) + { + case 'c': + print_only_size (pmui->cache_size); + break; + case 'd': + print_only_size (pmui->dirty_size); + break; + case 'w': + print_only_size (pmui->writeback_size); + break; + case 'e': + print_only_size (pmui->evicted_size); + break; + case 'r': + print_only_size (pmui->recently_evicted_size); + break; + default: + putchar('?'); + break; + } +} + +static void +mu_print_size (const struct muinfo *pmui, char const *string, char const *format) +{ + if (format) + { + for (char const *b=format; *b; ++b) + { + if (*b == '%') + { + b += 1; + char fmt_char = *b; + switch (fmt_char) + { + case '\0': + --b; + FALLTHROUGH; + case '%': + putchar('%'); + break; + default: + mu_print_stat (pmui, *b); + break; + } + } + else + { + putchar(*b); + } + } + } + else + { + /* Only print cache size by default if no format is provided */ + print_only_size (pmui->cache_size); + } + + if (opt_time) + { + putchar ('\t'); + show_date (time_format, pmui->tmax, localtz); + } + printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n'); + fflush (stdout); +} + +/* Fill the di_mnt set with local mount point dev/ino pairs. */ + +static void +fill_mount_table (void) +{ + struct mount_entry *mnt_ent = read_file_system_list (false); + while (mnt_ent) + { + struct mount_entry *mnt_free; + if (!mnt_ent->me_remote && !mnt_ent->me_dummy) + { + struct stat buf; + if (!stat (mnt_ent->me_mountdir, &buf)) + hash_ins (di_mnt, buf.st_ino, buf.st_dev); + else + { + /* Ignore stat failure. False positives are too common. + E.g., "Permission denied" on /run/user/<name>/gvfs. */ + } + } + + mnt_free = mnt_ent; + mnt_ent = mnt_ent->me_next; + free_mount_entry (mnt_free); + } +} + +/* This function checks whether any of the directories in the cycle that + fts detected is a mount point. */ + +static bool +mount_point_in_fts_cycle (FTSENT const *ent) +{ + FTSENT const *cycle_ent = ent->fts_cycle; + + if (!di_mnt) + { + /* Initialize the set of dev,inode pairs. */ + di_mnt = di_set_alloc (); + if (!di_mnt) + xalloc_die (); + + fill_mount_table (); + } + + while (ent && ent != cycle_ent) + { + if (di_set_lookup (di_mnt, ent->fts_statp->st_dev, + ent->fts_statp->st_ino) > 0) + { + return true; + } + ent = ent->fts_parent; + } + + return false; +} + +static bool +get_file_cachestat(const FTSENT* ent, const struct stat* sb, enum time_type tt, struct muinfo* mui) +{ + bool ret; + const char* filename = ent->fts_path; + int fd = -1; + + muinfo_init(mui); + + /* skip calling cachestat for symlinks */ + if (ent->fts_info == FTS_SL) { + goto out_time; + } + + fd = open(filename, O_RDONLY, 0400); + if (fd == -1) { + /* UNIX domain socket file */ + if (errno == ENXIO) { + goto out_time; + } + + /* file does not exist */ + if (access(filename, F_OK)) { + goto out_time; + } + + return false; + } + + struct cachestat cs; + struct cachestat_range cs_range = {0, sb->st_size}; + if (cachestat(fd, &cs_range, &cs, 0)) { + ret = false; + goto out; + } + + long pagesize = sysconf(_SC_PAGESIZE); + mui->cache_size = cs.nr_cache * pagesize; + mui->dirty_size = cs.nr_dirty * pagesize; + mui->writeback_size = cs.nr_writeback * pagesize; + mui->evicted_size = cs.nr_evicted * pagesize; + mui->recently_evicted_size = cs.nr_recently_evicted * pagesize; + +out_time: + mui->tmax = (tt == time_mtime ? get_stat_mtime(sb) + : tt == time_atime ? get_stat_atime(sb) + : get_stat_ctime(sb)); + + ret = true; + +out: + if (fd != -1) { + close(fd); + } + + return ret; +} + +/* This function is called once for every file system object that fts + encounters. fts does a depth-first traversal. This function knows + that and accumulates per-directory totals based on changes in + the depth of the current entry. It returns true on success. */ + +static bool +process_file (FTS *fts, FTSENT *ent, char const *format) +{ + bool ok = true; + + struct muinfo mui; + struct muinfo mui_to_print; + + size_t level; + static size_t n_alloc; + /* First element of the structure contains: + The sum of the sizes of all entries in the single directory + at the corresponding level. Although this does include the sizes + corresponding to each subdirectory, it does not include the size of + any file in a subdirectory. Also corresponding last modified date. + Second element of the structure contains: + The sum of the sizes of all entries in the hierarchy at or below the + directory at the specified level. */ + + static struct mulevel *mulvl; + + char const *file = ent->fts_path; + const struct stat *sb = ent->fts_statp; + int info = ent->fts_info; + + if (info == FTS_DNR) + { + /* An error occurred, but the size is known, so count it. */ + error (0, ent->fts_errno, _("cannot read directory %s"), quoteaf (file)); + ok = false; + } + else if (info != FTS_DP) + { + bool excluded = excluded_file_name (exclude, file); + if (! excluded) + { + /* Make the stat buffer *SB valid, or fail noisily. */ + + if (info == FTS_NSOK) + { + fts_set (fts, ent, FTS_AGAIN); + MAYBE_UNUSED FTSENT const *e = fts_read (fts); + affirm (e == ent); + info = ent->fts_info; + } + + if (info == FTS_NS || info == FTS_SLNONE) + { + error (0, ent->fts_errno, _("cannot access %s"), quoteaf (file)); + return false; + } + + /* The --one-file-system (-x) option cannot exclude anything + specified on the command-line. By definition, it can exclude + a file or directory only when its device number is different + from that of its just-processed parent directory, and mu does + not process the parent of a command-line argument. */ + if (fts->fts_options & FTS_XDEV + && FTS_ROOTLEVEL < ent->fts_level + && fts->fts_dev != sb->st_dev) + excluded = true; + } + + if (excluded + || (! opt_count_all + && (hash_all || (! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink)) + && ! hash_ins (di_files, sb->st_ino, sb->st_dev))) + { + /* If ignoring a directory in preorder, skip its children. + Ignore the next fts_read output too, as it's a postorder + visit to the same directory. */ + if (info == FTS_D) + { + fts_set (fts, ent, FTS_SKIP); + MAYBE_UNUSED FTSENT const *e = fts_read (fts); + affirm (e == ent); + } + + return true; + } + + switch (info) + { + case FTS_D: + return true; + + case FTS_ERR: + /* An error occurred, but the size is known, so count it. */ + error (0, ent->fts_errno, "%s", quotef (file)); + ok = false; + break; + + case FTS_DC: + /* If not following symlinks and not a (bind) mount point. */ + if (cycle_warning_required (fts, ent) + && ! mount_point_in_fts_cycle (ent)) + { + emit_cycle_warning (file); + return false; + } + return true; + } + } + + if (!get_file_cachestat(ent, sb, time_type, &mui)) { + error (EXIT_FAILURE, errno, "getting file cache stat for %s failed", ent->fts_path); + } + + level = ent->fts_level; + mui_to_print = mui; + + if (n_alloc == 0) + { + n_alloc = level + 10; + mulvl = xcalloc (n_alloc, sizeof *mulvl); + } + else + { + if (level == prev_level) + { + /* This is usually the most common case. Do nothing. */ + } + else if (level > prev_level) + { + /* Descending the hierarchy. + Clear the accumulators for *all* levels between prev_level + and the current one. The depth may change dramatically, + e.g., from 1 to 10. */ + + if (n_alloc <= level) + { + mulvl = xnrealloc (mulvl, level, 2 * sizeof *mulvl); + n_alloc = level * 2; + } + + for (size_t i = prev_level + 1; i <= level; i++) + { + muinfo_init (&mulvl[i].ent); + muinfo_init (&mulvl[i].subdir); + } + } + else /* level < prev_level */ + { + /* Ascending the hierarchy. + Process a directory only after all entries in that + directory have been processed. When the depth decreases, + propagate sums from the children (prev_level) to the parent. + Here, the current level is always one smaller than the + previous one. */ + + affirm (level == prev_level - 1); + + muinfo_add (&mui_to_print, &mulvl[prev_level].ent); + if (!opt_separate_dirs) + muinfo_add (&mui_to_print, &mulvl[prev_level].subdir); + muinfo_add (&mulvl[level].subdir, &mulvl[prev_level].ent); + muinfo_add (&mulvl[level].subdir, &mulvl[prev_level].subdir); + } + } + + prev_level = level; + + /* Let the size of a directory entry contribute to the total for the + containing directory, unless --separate-dirs (-S) is specified. */ + if (! (opt_separate_dirs && IS_DIR_TYPE (info))) + muinfo_add (&mulvl[level].ent, &mui); + + /* Even if this directory is unreadable or we can't chdir into it, + do let its size contribute to the total. */ + muinfo_add (&tot_mui, &mui); + + if ((IS_DIR_TYPE (info) && level <= max_depth) + || (opt_all && level <= max_depth) + || level == 0) + { + /* Print or elide this entry according to the --threshold option. */ + uintmax_t v = mui_to_print.cache_size; + if (opt_threshold < 0 + ? v <= -opt_threshold + : v >= opt_threshold) { + mu_print_size(&mui_to_print, file, format); + } + } + + return ok; +} + +/* Recursively print the sizes of the directories (and, if selected, files) + named in FILES, the last entry of which is null. + BIT_FLAGS controls how fts works. + Return true if successful. */ + +static bool +mu_files (char **files, int bit_flags, char const *format) +{ + bool ok = true; + + if (*files) + { + FTS *fts = xfts_open (files, bit_flags, nullptr); + + while (true) + { + FTSENT *ent; + + ent = fts_read (fts); + if (ent == nullptr) + { + if (errno != 0) + { + error (0, errno, _("fts_read failed: %s"), + quotef (fts->fts_path)); + ok = false; + } + + /* When exiting this loop early, be careful to reset the + global, prev_level, used in process_file. Otherwise, its + (level == prev_level - 1) assertion could fail. */ + prev_level = 0; + break; + } + FTS_CROSS_CHECK (fts); + + ok &= process_file (fts, ent, format); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + ok = false; + } + } + + return ok; +} + +int +main (int argc, char **argv) +{ + char *cwd_only[2]; + bool max_depth_specified = false; + bool ok = true; + char *files_from = nullptr; + + /* Bit flags that control how fts works. */ + int bit_flags = FTS_NOSTAT; + + /* Select one of the three FTS_ options that control if/when + to follow a symlink. */ + int symlink_deref_bits = FTS_PHYSICAL; + + /* If true, display only a total for each argument. */ + bool opt_summarize_only = false; + + cwd_only[0] = bad_cast ("."); + cwd_only[1] = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + exclude = new_exclude (); + + human_options (getenv ("MU_BLOCK_SIZE"), + &human_output_opts, &output_block_size); + + muinfo_init(&tot_mui); + + char *format = nullptr; + + while (true) + { + int oi = -1; + int c = getopt_long (argc, argv, "0abd:cf:hHklmst:xB:DLPSX:", + long_options, &oi); + if (c == -1) + break; + + switch (c) + { +#if DU_DEBUG + case FTS_DEBUG: + fts_debug = true; + break; +#endif + + case '0': + opt_nul_terminate_output = true; + break; + + case 'a': + opt_all = true; + break; + + case 'b': + human_output_opts = 0; + output_block_size = 1; + break; + + case 'c': + print_grand_total = true; + break; + + case 'f': + format = optarg; + break; + + case 'h': + human_output_opts = human_autoscale | human_SI | human_base_1024; + output_block_size = 1; + break; + + case HUMAN_SI_OPTION: + human_output_opts = human_autoscale | human_SI; + output_block_size = 1; + break; + + case 'k': + human_output_opts = 0; + output_block_size = 1024; + break; + + case 'd': /* --max-depth=N */ + { + intmax_t tmp; + if (xstrtoimax (optarg, nullptr, 0, &tmp, "") == LONGINT_OK + && tmp <= IDX_MAX) + { + max_depth_specified = true; + max_depth = tmp; + } + else + { + error (0, 0, _("invalid maximum depth %s"), + quote (optarg)); + ok = false; + } + } + break; + + case 'm': + human_output_opts = 0; + output_block_size = 1024 * 1024; + break; + + case 'l': + opt_count_all = true; + break; + + case 's': + opt_summarize_only = true; + break; + + case 't': + { + enum strtol_error e; + e = xstrtoimax (optarg, nullptr, 0, &opt_threshold, + "kKmMGTPEZYRQ0"); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + if (opt_threshold == 0 && *optarg == '-') + { + /* Do not allow -0, as this wouldn't make sense anyway. */ + error (EXIT_FAILURE, 0, _("invalid --threshold argument '-0'")); + } + } + break; + + case 'x': + bit_flags |= FTS_XDEV; + break; + + case 'B': + { + enum strtol_error e = human_options (optarg, &human_output_opts, + &output_block_size); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + } + break; + + case 'H': /* NOTE: before 2008-12, -H was equivalent to --si. */ + case 'D': + symlink_deref_bits = FTS_COMFOLLOW | FTS_PHYSICAL; + break; + + case 'L': /* --dereference */ + symlink_deref_bits = FTS_LOGICAL; + break; + + case 'P': /* --no-dereference */ + symlink_deref_bits = FTS_PHYSICAL; + break; + + case 'S': + opt_separate_dirs = true; + break; + + case 'X': + if (add_exclude_file (add_exclude, exclude, optarg, + EXCLUDE_WILDCARDS, '\n')) + { + error (0, errno, "%s", quotef (optarg)); + ok = false; + } + break; + + case FILES0_FROM_OPTION: + files_from = optarg; + break; + + case EXCLUDE_OPTION: + add_exclude (exclude, optarg, EXCLUDE_WILDCARDS); + break; + + case TIME_OPTION: + opt_time = true; + time_type = + (optarg + ? XARGMATCH ("--time", optarg, time_args, time_types) + : time_mtime); + localtz = tzalloc (getenv ("TZ")); + break; + + case TIME_STYLE_OPTION: + time_style = optarg; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + ok = false; + } + } + + if (!ok) + usage (EXIT_FAILURE); + + if (opt_all && opt_summarize_only) + { + error (0, 0, _("cannot both summarize and show all entries")); + usage (EXIT_FAILURE); + } + + if (opt_summarize_only && max_depth_specified && max_depth == 0) + { + error (0, 0, + _("warning: summarizing is the same as using --max-depth=0")); + } + + if (opt_summarize_only && max_depth_specified && max_depth != 0) + { + error (0, 0, _("warning: summarizing conflicts with --max-depth=%td"), + max_depth); + usage (EXIT_FAILURE); + } + + if (opt_summarize_only) + max_depth = 0; + + /* Process time style if printing last times. */ + if (opt_time) + { + if (! time_style) + { + time_style = getenv ("TIME_STYLE"); + + /* Ignore TIMESTYLE="locale", for compatibility with ls. */ + if (! time_style || STREQ (time_style, "locale")) + time_style = "long-iso"; + else if (*time_style == '+') + { + /* Ignore anything after a newline, for compatibility + with ls. */ + char *p = strchr (time_style, '\n'); + if (p) + *p = '\0'; + } + else + { + /* Ignore "posix-" prefix, for compatibility with ls. */ + static char const posix_prefix[] = "posix-"; + static const size_t prefix_len = sizeof posix_prefix - 1; + while (STREQ_LEN (time_style, posix_prefix, prefix_len)) + time_style += prefix_len; + } + } + + if (*time_style == '+') + time_format = time_style + 1; + else + { + switch (XARGMATCH ("time style", time_style, + time_style_args, time_style_types)) + { + case full_iso_time_style: + time_format = "%Y-%m-%d %H:%M:%S.%N %z"; + break; + + case long_iso_time_style: + time_format = "%Y-%m-%d %H:%M"; + break; + + case iso_time_style: + time_format = "%Y-%m-%d"; + break; + } + } + } + + struct argv_iterator *ai; + if (files_from) + { + /* When using --files0-from=F, you may not specify any files + on the command-line. */ + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (EXIT_FAILURE); + } + + if (! (STREQ (files_from, "-") || freopen (files_from, "r", stdin))) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (files_from)); + + ai = argv_iter_init_stream (stdin); + + /* It's not easy here to count the arguments, so assume the + worst. */ + hash_all = true; + } + else + { + char **files = (optind < argc ? argv + optind : cwd_only); + ai = argv_iter_init_argv (files); + + /* Hash all dev,ino pairs if there are multiple arguments, or if + following non-command-line symlinks, because in either case a + file with just one hard link might be seen more than once. */ + hash_all = (optind + 1 < argc || symlink_deref_bits == FTS_LOGICAL); + } + + if (!ai) + xalloc_die (); + + /* Initialize the set of dev,inode pairs. */ + di_files = di_set_alloc (); + if (!di_files) + xalloc_die (); + + /* If not hashing everything, process_file won't find cycles on its + own, so ask fts_read to check for them accurately. */ + if (opt_count_all || ! hash_all) + bit_flags |= FTS_TIGHT_CYCLE_CHECK; + + bit_flags |= symlink_deref_bits; + static char *temp_argv[] = { nullptr, nullptr }; + + while (true) + { + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotef (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + case AI_ERR_OK: default: + affirm (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | du --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (file_name)); + skip_file = true; + } + + /* Report and skip any empty file names before invoking fts. + This works around a glitch in fts, which fails immediately + (without looking at the other file names) when given an empty + file name. */ + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == nullptr) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + idx_t file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%td: %s", quotef (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + ok = false; + else + { + temp_argv[0] = file_name; + ok &= mu_files (temp_argv, bit_flags, format); + } + } + argv_iter_done: + + argv_iter_free (ai); + di_set_free (di_files); + if (di_mnt) + di_set_free (di_mnt); + + if (files_from && (ferror (stdin) || fclose (stdin) != 0) && ok) + error (EXIT_FAILURE, 0, _("error reading %s"), quoteaf (files_from)); + + if (print_grand_total) { + mu_print_size (&tot_mui, _("total"), format); + } + + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/mu/basic.sh b/tests/mu/basic.sh new file mode 100755 index 000000000..fa5fef0cc --- /dev/null +++ b/tests/mu/basic.sh @@ -0,0 +1,65 @@ +#!/bin/sh + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p a/b || framework_failure_ +printf %4096s x > a/b/f || framework_failure_ + +mu -a a | sed 's/\s/ /' > out || fail=1 +echo === >> out +mu -a -S a | sed 's/\s/ /' >> out || fail=1 +echo === >> out +mu -s a | sed 's/\s/ /' >> out || fail=1 +echo === >> out +mu -a -b a | sed 's/\s/ /' >> out || fail=1 +echo === >> out +mu -a -h a | sed 's/\s/ /' >> out || fail=1 +echo === >> out +mu -a --block-size=512 a | sed 's/\s/ /' >> out || fail=1 +echo === >> out + + +cat <<\EOF > exp +4 a/b/f +4 a/b +4 a +=== +4 a/b/f +4 a/b +0 a +=== +4 a +=== +4096 a/b/f +4096 a/b +4096 a +=== +4.0K a/b/f +4.0K a/b +4.0K a +=== +8 a/b/f +8 a/b +8 a +=== +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/bigtime.sh b/tests/mu/bigtime.sh new file mode 100755 index 000000000..e1d8d9f51 --- /dev/null +++ b/tests/mu/bigtime.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# Exercise mu on a file with a big timestamp. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +export LC_ALL=C +export TZ=UTC0 + +# 2**63 - 1 +bignum=9223372036854775807 + +touch -d @$bignum future 2>/dev/null && +future_time=$(ls -l future) && +case "$future_time" in +*" $bignum "*) + : ;; +*' Dec 4 300627798676 '*) + skip_ "file system and localtime both handle big timestamps" ;; +*) + skip_ "file system or localtime mishandles big timestamps:" \ + "$future_time" ;; +esac || skip_ "file system cannot represent big timestamps" + +printf "0\t$bignum\tfuture\n" > exp || framework_failure_ +printf "mu: time '$bignum' is out of range\n" > err_ok || framework_failure_ + +mu --time future >out 2>err || fail=1 + +# On some systems an empty file occupies 4 blocks. +# Map the number of blocks to 0. +sed 's/^[0-9][0-9]*/0/' out > k && mv k out + +compare exp out || fail=1 +compare err err_ok || fail=1 + +Exit $fail diff --git a/tests/mu/bind-mount-dir-cycle.sh b/tests/mu/bind-mount-dir-cycle.sh new file mode 100755 index 000000000..6d22de094 --- /dev/null +++ b/tests/mu/bind-mount-dir-cycle.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# Check that mu can handle sub-bind-mounts cycles as well. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu +require_root_ + +cleanup_() { umount a/b/c; } + +mkdir -p a/b/c || framework_failure_ +mount --bind a a/b/c \ + || skip_ 'This test requires mount with a working --bind option.' + +echo a/b/c > exp || framework_failure_ +echo a/b >> exp || framework_failure_ + +mu a/b > out 2> err || fail=1 +sed 's/^[0-9][0-9]* //' out > k && mv k out + +compare /dev/null err || fail=1 +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/deref-args.sh b/tests/mu/deref-args.sh new file mode 100755 index 000000000..05f1e9ed4 --- /dev/null +++ b/tests/mu/deref-args.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# Ensure that --dereference-args (-D) gives reasonable names. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p dir/a || framework_failure_ +ln -s dir slink || framework_failure_ + +mu -D slink | sed 's/^[0-9][0-9]* //' > out +# Ensure that the trailing slash is preserved and handled properly. +mu -D slink/ | sed 's/^[0-9][0-9]* //' >> out + +cat <<\EOF > exp +slink/a +slink +slink/a +slink/ +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/deref.sh b/tests/mu/deref.sh new file mode 100755 index 000000000..09454cf46 --- /dev/null +++ b/tests/mu/deref.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p a/sub || framework_failure_ +ln -s a/sub slink || framework_failure_ +touch b || framework_failure_ +ln -s .. a/sub/dotdot || framework_failure_ +ln -s nowhere dangle || framework_failure_ + + +mu -sD slink b > /dev/null 2>&1 || fail=1 + +returns_ 1 mu -L dangle > /dev/null 2>&1 || fail=1 + +mu_L_output=$(mu -L a) || fail=1 +mu_lL_output=$(mu -lL a) || fail=1 +mu_x_output=$(mu --exclude=dotdot a) || fail=1 +test "X$mu_L_output" = "X$mu_x_output" || fail=1 +test "X$mu_lL_output" = "X$mu_x_output" || fail=1 + +Exit $fail diff --git a/tests/mu/exclude.sh b/tests/mu/exclude.sh new file mode 100755 index 000000000..53279f53b --- /dev/null +++ b/tests/mu/exclude.sh @@ -0,0 +1,58 @@ +#!/bin/sh +# make sure mu's --exclude option works + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p a/b/c a/x/y a/u/v || framework_failure_ + + +mu --exclude=x a | sed 's/^[0-9][0-9]* //' | sort > out || fail=1 +printf '===\n' >> out +printf 'b\n' > excl +mu --exclude-from=excl a | sed 's/^[0-9][0-9]* //' | sort >> out || fail=1 +printf '===\n' >> out +# Make sure that we can exclude an entire hierarchy. +mu --exclude=a a >> out || fail=1 +# Make sure that we can exclude based on more than one component. +# Before coreutils-5.3.0, this part would fail. +printf '===\n' >> out +mu --exclude=a/u --exclude=a/b a \ + | sed 's/^[0-9][0-9]* //' | sort >> out || fail=1 +cat <<\EOF > exp +a +a/b +a/b/c +a/u +a/u/v +=== +a +a/u +a/u/v +a/x +a/x/y +=== +=== +a +a/x +a/x/y +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/fd-leak.sh b/tests/mu/fd-leak.sh new file mode 100755 index 000000000..605e270bc --- /dev/null +++ b/tests/mu/fd-leak.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# check for file descriptor leak + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +# Create 1296 (36^2) files. +# Their names and separating spaces take up 3887 bytes. +x='a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9' +f= +for i in $x; do + for j in $x; do + f="$f $i$j" + done +done + +# This may fail mue to command line limitations. +touch $f || framework_failure_ + + +# With coreutils-5.0, this would fail mue to a file descriptor leak. +mu $f > out || fail=1 + +Exit $fail diff --git a/tests/mu/files0-from-dir.sh b/tests/mu/files0-from-dir.sh new file mode 100755 index 000000000..850ef7b41 --- /dev/null +++ b/tests/mu/files0-from-dir.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# ensure that mu and wc handle --files0-from=DIR + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu wc + +mkdir dir + +# Skip this test if reading from a directory succeeds. +# In that case, using --files0-from=dir would yield garbage, +# interpreting the directory entry as a sequence of +# NUL-separated file names. +cat dir > /dev/null && skip_ "cat dir/ succeeds" + +for prog in mu wc; do + $prog --files0-from=dir > /dev/null 2>err && fail=1 + printf "$prog: dir:\n" > exp || fail=1 + # The diagnostic string is usually "Is a directory" (ENOTDIR), + # but accept a different string or errno value. + sed "s/dir:.*/dir:/" err > k; mv k err + compare exp err || fail=1 +done + +Exit $fail diff --git a/tests/mu/format.sh b/tests/mu/format.sh new file mode 100755 index 000000000..798bcd0e4 --- /dev/null +++ b/tests/mu/format.sh @@ -0,0 +1,64 @@ +#!/bin/sh + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +printf %8192s x > testfile || framework_failure_ +# make sure no dirty pages +sync testfile + +mu -f '%c %d %w %e %r' testfile | sed 's/\stestfile/ testfile/' > out || fail=1 +echo === >> out + +# append 4k to the file. That 4k will be dirty +printf %4096s x >> testfile || framework_failure_ + +mu -f '%c %d %w %e %r' testfile | sed 's/\stestfile/ testfile/' >> out || fail=1 +echo === >> out + +# flush the dirty page +sync testfile +mu -f '%c %d %w %e %r' testfile | sed 's/\stestfile/ testfile/' >> out || fail=1 +echo === >> out + +# drop the file cache +dd of=testfile oflag=nocache conv=notrunc,fdatasync count=0 +mu -f '%c %d %w %e %r' testfile | sed 's/\stestfile/ testfile/' >> out || fail=1 +echo === >> out + +# pull the pages into cache +cat testfile +mu -f '%c %d %w %e %r' testfile | sed 's/\stestfile/ testfile/' >> out || fail=1 +echo === >> out + +cat <<\EOF > exp +8 0 0 0 0 testfile +=== +12 4 0 0 0 testfile +=== +12 0 0 0 0 testfile +=== +0 0 0 0 0 testfile +=== +12 0 0 0 0 testfile +=== +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/hard-link.sh b/tests/mu/hard-link.sh new file mode 100755 index 000000000..2d8d1aa4c --- /dev/null +++ b/tests/mu/hard-link.sh @@ -0,0 +1,61 @@ +#!/bin/sh +# Ensure that hard-linked files are counted (and listed) only once. +# Likewise for excluded directories. +# Ensure that hard links _are_ listed twice when using --count-links. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p dir/sub +( cd dir && + { echo non-empty > f1 + ln f1 f2 + ln -s f1 f3 + echo non-empty > sub/F; } ) + +mu -a -L --exclude=sub --count-links dir \ + | sed 's/^[0-9][0-9]* //' | sort -r > out || fail=1 + +# For these tests, transform f1 or f2 or f3 (whichever name is find +# first) to f_. That is necessary because, depending on the type of +# file system, mu could encounter any of those linked files first, +# thus listing that one and not the others. +for args in '-L' '' +do + echo === >> out + mu -a --exclude=sub $args dir \ + | sed 's/^[0-9][0-9]* //' | sed 's/f[123]/f_/' >> out || fail=1 +done + +cat <<\EOF > exp +dir/f3 +dir/f2 +dir/f1 +dir +=== +dir/f_ +dir +=== +dir/f_ +dir/f_ +dir +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/inacc-dir.sh b/tests/mu/inacc-dir.sh new file mode 100755 index 000000000..0e1f0f591 --- /dev/null +++ b/tests/mu/inacc-dir.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# Ensure that mu counts the size of an inaccessible directory. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu +skip_if_root_ + +mkdir -p a/sub || framework_failure_ + +mu -s a > exp || fail=1 +chmod 0 a/sub || fail=1 +# Expect failure, ignore diagnostics. +mu -s a > out 2> /dev/null && fail=1 + +# mu won't be able to get cache stat if no permission +compare exp out && fail=1 + +# Same as above, but don't use -s, so we print +# an entry for the unreadable "sub", too. +chmod 700 a/sub || fail=1 +mu -k a > exp || fail=1 +chmod 0 a/sub || fail=1 +# Expect failure, ignore diagnostics. +mu -k a > out 2> /dev/null && fail=1 + +# mu won't be able to get cache stat if no permission +compare exp out && fail=1 + +Exit $fail diff --git a/tests/mu/max-depth.sh b/tests/mu/max-depth.sh new file mode 100755 index 000000000..6c7b00a61 --- /dev/null +++ b/tests/mu/max-depth.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# exercise mu's --max-depth=N option + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p a/b/c/d/e || framework_failure_ +printf 'a/b/c\na/b\na\n' > exp || framework_failure_ + +mu --max-depth=2 a > out 2>err || fail=1 + +# Remove the sizes. They vary between file systems. +cut -f2- out > k && mv k out +compare exp out || fail=1 +compare /dev/null err || fail=1 + +# Repeat, but use -d 1. +printf 'a/b\na\n' > exp || framework_failure_ +mu -d 1 a > out 2>err || fail=1 +cut -f2- out > k && mv k out +compare exp out || fail=1 +compare /dev/null err || fail=1 + +Exit $fail diff --git a/tests/mu/no-deref.sh b/tests/mu/no-deref.sh new file mode 100755 index 000000000..ff5750e61 --- /dev/null +++ b/tests/mu/no-deref.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# Ensure that by default, mu doesn't dereference command-line symlinks. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +mkdir -p dir/a/b || framework_failure_ +ln -s dir slink || framework_failure_ + + +mu slink | sed 's/^[0-9][0-9]* //' > out +cat <<\EOF > exp +slink +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/no-x.sh b/tests/mu/no-x.sh new file mode 100755 index 000000000..9ba6d9afe --- /dev/null +++ b/tests/mu/no-x.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# Make sure mu gives the right diagnostic for a readable, +# but inaccessible directory. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu +skip_if_root_ + +mkdir -p d/no-x/y || framework_failure_ +chmod u=rw d/no-x || framework_failure_ + + +# This must exit nonzero. +mu d >/dev/null 2>out && fail=1 + +prog=mu +# NOTE: this code is the same for all tests/*/no-x tests. +# Depending on whether fts is using native fdopendir, we see one +# of the following diagnostics (note also the /y suffix in one case): +# prog: 'd/no-x': Permission denied +# prog: cannot access 'd/no-x/y': Permission denied +# prog: cannot read directory 'd/no-x': Permission denied +# Convert either of the latter two to the first one. +sed "s/^$prog: cannot access /$prog: /" out > t && mv t out +sed "s/^$prog: cannot read directory /$prog: /" out > t && mv t out +sed 's,d/no-x/y,d/no-x,' out > t && mv t out + +cat <<EOF > exp +$prog: 'd/no-x': Permission denied +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/one-file-system.sh b/tests/mu/one-file-system.sh new file mode 100755 index 000000000..89818b451 --- /dev/null +++ b/tests/mu/one-file-system.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# Test for bugs in mu's --one-file-system (-x) option. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu +cleanup_() { rm -rf "$other_partition_tmpdir"; } +. "$abs_srcdir/tests/other-fs-tmpdir" + +mkdir -p b/c y/z d "$other_partition_tmpdir/x" || framework_failure_ +ln -s "$other_partition_tmpdir/x" d || framework_failure_ + +mu -ax b y > t || fail=1 +sed 's/^[0-9][0-9]* //' t > out || framework_failure_ +cat <<\EOF > exp || framework_failure_ +b/c +b +y/z +y +EOF + +compare exp out || fail=1 + +mu -xL d > u || fail=1 +sed 's/^[0-9][0-9]* //' u > out1 || framework_failure_ +echo d > exp1 || framework_failure_ +compare exp1 out1 || fail=1 + +touch f +for opt in -x -xs; do + mu $opt f > u || fail=1 + sed 's/^[0-9][0-9]* //' u > out2 || framework_failure_ + echo f > exp2 || framework_failure_ + compare exp2 out2 || fail=1 +done + +Exit $fail diff --git a/tests/mu/slash.sh b/tests/mu/slash.sh new file mode 100755 index 000000000..0764c7e47 --- /dev/null +++ b/tests/mu/slash.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# 'mu /' would omit the '/' on the last line. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu +require_readable_root_ + + +mu --exclude='[^/]*' -x / > out-t || fail=1 +sed 's/^[0-9][0-9]* //' out-t > out +rm -f out-t +cat <<\EOF > exp +/ +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/threshold.sh b/tests/mu/threshold.sh new file mode 100755 index 000000000..02fbadc3a --- /dev/null +++ b/tests/mu/threshold.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +printf %65536s x > 64k || framework_failure_ +printf %8192s x > 8k || framework_failure_ + +mu -t 8000 * | sed 's/^[0-9][0-9]*\t//' > out || fail=1 +echo === >> out +mu -t 9000 * | sed 's/^[0-9][0-9]*\t//' >> out || fail=1 +echo === >> out +mu -t 66000 * | sed 's/^[0-9][0-9]*\t//' >> out || fail=1 +echo === >> out + +cat <<\EOF > exp +64k +8k +=== +64k +=== +=== +EOF + +compare exp out || fail=1 + +Exit $fail diff --git a/tests/mu/two-args.sh b/tests/mu/two-args.sh new file mode 100755 index 000000000..85558538f --- /dev/null +++ b/tests/mu/two-args.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# Make sure 'mu d/1 d/2' works. +# That command failed with mu from fileutils-4.0q. + +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ mu + +# Run this test from a sub-directory one level deeper than normal, +# so that the "mu .." below doesn't traverse sibling directories +# that may be inaccessible due concurrently-running tests. +mkdir sub || framework_failure_ +cd sub || framework_failure_ + +t=t +mkdir -p $t/1 $t/2 || framework_failure_ + +test -d $t || fail=1 +mu $t/1 $t/2 > /dev/null || fail=1 + +# Make sure 'mu . $t' and 'mu .. $t' work. +# These would fail prior to fileutils-4.0y. +mu . $t > /dev/null || fail=1 +mu .. $t > /dev/null || fail=1 + +Exit $fail -- 2.48.1