Conform to POSIX, which says `Files with multiple links shall be counted and written for only one entry,' in the 2008[1] and 2013[2] editions, and uses more words to say the same thing in the 2017[3] and 2024[4] editions. This patch also keeps inodes between operands and dedups symlinks if applicable, which are implementation-defined in 2017 and required in 2024. See also the `RATIONALE' section in the 2024 edition.
[1] https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html [2] https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html [3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html [4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html --- du.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/du.c b/du.c index 1815a02..be073a9 100644 --- a/du.c +++ b/du.c @@ -5,6 +5,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <search.h> #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -20,6 +21,13 @@ static int aflag = 0; static int sflag = 0; static int hflag = 0; +static void * devtree = NULL; + +struct device { + dev_t devno; + void * inodetree; +}; + static void printpath(off_t n, const char *path) { @@ -35,6 +43,58 @@ nblks(blkcnt_t blocks) return (512 * blocks + blksize - 1) / blksize; } +static int +compare_dev(const void *a, const void *b) +{ + const struct device *A = a, *B = b; + if (A->devno == B->devno) + return 0; + if (A->devno > B->devno) + return -1; + else /* A->devno < B->devno */ + return 1; +} +static int +compare_ino(const void *a, const void *b) +{ + ino_t A = *(ino_t*)a, B = *(ino_t*)b; + return A == B ? 0 : A > B ? -1 : 1; +} + +static int +isduplicate(dev_t dev, ino_t ino) +{ + struct device dev_branch = { dev, NULL }; + struct device ** found = tsearch(&dev_branch, &devtree, compare_dev); + + if (!found) + eprintf("%s:", argv0); + + if (*found == &dev_branch) { + ino_t *ino_p; + /* new device added */ + *found = emalloc(sizeof **found); + **found = dev_branch; + /* init with this inode */ + ino_p = emalloc(sizeof *ino_p); + *ino_p = ino; + if (!tsearch(ino_p, &(**found).inodetree, compare_ino)) + eprintf("%s:", argv0); + } else { + /* not new; see if the inode is here */ + ino_t **found_ino = tsearch(&ino, &(**found).inodetree, compare_ino); + if (!found_ino) + eprintf("%s:", argv0); + if (*found_ino == &ino) { + *found_ino = emalloc(sizeof *found_ino); + **found_ino = ino; + } else + return 1; + } + + return 0; +} + static void du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r) { @@ -43,8 +103,13 @@ du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r) subtotal = nblks(st->st_blocks); if (S_ISDIR(st->st_mode)) recurse(dirfd, path, &subtotal, r); + else if (r->follow != 'P' || st->st_nlink > 1) + if (isduplicate(st->st_dev, st->st_ino)) + goto print; + *total += subtotal; +print: if (!r->depth) printpath(*total, r->path); else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || aflag)) -- 2.48.1