Conform to POSIX, which says `Files with multiple links shall be counted
and written for only one entry,' in the 2008[1] and 2013[2] editions, and
uses more words to say the same thing in the 2017[3] and 2024[4] editions.
This patch also keeps inodes between operands and dedups symlinks if
applicable, which are implementation-defined in 2017 and required in 2024.
See also the `RATIONALE' section in the 2024 edition.

[1] 
https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html
[2] 
https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html
[3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html
[4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html

---
 du.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/du.c b/du.c
index 1815a02..be073a9 100644
--- a/du.c
+++ b/du.c
@@ -5,6 +5,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <search.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -20,6 +21,13 @@ static int aflag = 0;
 static int sflag = 0;
 static int hflag = 0;
 
+static void * devtree = NULL;
+
+struct device {
+       dev_t devno;
+       void * inodetree;
+};
+
 static void
 printpath(off_t n, const char *path)
 {
@@ -35,6 +43,58 @@ nblks(blkcnt_t blocks)
        return (512 * blocks + blksize - 1) / blksize;
 }
 
+static int
+compare_dev(const void *a, const void *b)
+{
+       const struct device *A = a, *B = b;
+       if (A->devno == B->devno)
+               return 0;
+       if (A->devno > B->devno)
+               return -1;
+       else /* A->devno < B->devno */
+               return 1;
+}
+static int
+compare_ino(const void *a, const void *b)
+{
+       ino_t A = *(ino_t*)a, B = *(ino_t*)b;
+       return A == B ? 0 : A > B ? -1 : 1;
+}
+
+static int
+isduplicate(dev_t dev, ino_t ino)
+{
+       struct device dev_branch = { dev, NULL };
+       struct device ** found = tsearch(&dev_branch, &devtree, compare_dev);
+
+       if (!found)
+               eprintf("%s:", argv0);
+
+       if (*found == &dev_branch) {
+               ino_t *ino_p;
+               /* new device added */
+               *found = emalloc(sizeof **found);
+               **found = dev_branch;
+               /* init with this inode */
+               ino_p = emalloc(sizeof *ino_p);
+               *ino_p = ino;
+               if (!tsearch(ino_p, &(**found).inodetree, compare_ino))
+                       eprintf("%s:", argv0);
+       } else {
+               /* not new; see if the inode is here */
+               ino_t **found_ino = tsearch(&ino, &(**found).inodetree, 
compare_ino);
+               if (!found_ino)
+                       eprintf("%s:", argv0);
+               if (*found_ino == &ino) {
+                       *found_ino = emalloc(sizeof *found_ino);
+                       **found_ino = ino;
+               } else
+                       return 1;
+       }
+
+       return 0;
+}
+
 static void
 du(int dirfd, const char *path, struct stat *st, void *data, struct recursor 
*r)
 {
@@ -43,8 +103,13 @@ du(int dirfd, const char *path, struct stat *st, void 
*data, struct recursor *r)
        subtotal = nblks(st->st_blocks);
        if (S_ISDIR(st->st_mode))
                recurse(dirfd, path, &subtotal, r);
+       else if (r->follow != 'P' || st->st_nlink > 1)
+               if (isduplicate(st->st_dev, st->st_ino))
+                       goto print;
+
        *total += subtotal;
 
+print:
        if (!r->depth)
                printpath(*total, r->path);
        else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || 
aflag))
-- 
2.48.1


Reply via email to