Hi,

Quoth Roberto E. Vargas Caballero <k...@shike2.net>:
> Quoth remph <l...@disroot.org>:
> > +struct device {
> > +   dev_t devno;
> > +   void * inodetree;
> > +};
> > +
> 
> I can see that you define a device struct, that contains a inode
> table, but I think it is just easier to create a struct like:
> 
>       struct file {
>               dev_t dev;
>               ino_t inode;
>       };
> 
> 
> and adjust the compare function to use both fields.  Am I missing
> something?

I reworked it to use only one tree. Please, tell me what you think
about this other version (some small changes were also done, like
for example changing isduplicated to duplicated, because the is[a-z]*
namespace is reserved to the implementation, and some bracing changes
to accomodate to the style used in suckless).

--- 8< ---
>From e8fe04c543eab13e892beda05cb9bccb9e4f441e Mon Sep 17 00:00:00 2001
From: remph <l...@disroot.org>
Date: Mon, 3 Mar 2025 19:52:37 +0100
Subject: [PATCH] du: Dedup hardlinks

Conform to POSIX, which says `Files with multiple links shall be counted
and written for only one entry,' in the 2008[1] and 2013[2] editions, and
uses more words to say the same thing in the 2017[3] and 2024[4] editions.
This patch also keeps inodes between operands and dedups symlinks if
applicable, which are implementation-defined in 2017 and required in 2024.
See also the `RATIONALE' section in the 2024 edition.

[1] 
https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html
[2] 
https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html
[3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html
[4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html
---
 du.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/du.c b/du.c
index 1815a02..782b09a 100644
--- a/du.c
+++ b/du.c
@@ -5,6 +5,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <search.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -20,6 +21,11 @@ static int aflag = 0;
 static int sflag = 0;
 static int hflag = 0;
 
+struct file {
+       dev_t devno;
+       ino_t inode;
+};
+
 static void
 printpath(off_t n, const char *path)
 {
@@ -35,16 +41,61 @@ nblks(blkcnt_t blocks)
        return (512 * blocks + blksize - 1) / blksize;
 }
 
+static int
+cmp(const void *p1, const void *p2)
+{
+       const struct file *f1 = p1, *f2 = p2;
+
+       if (f1->devno > f2->devno)
+               return -1;
+       if (f1->devno < f2->devno)
+               return 1;
+
+       /* f1->devno == f2->devno */
+       if (f1->inode < f2->inode)
+               return -1;
+       if (f1->inode > f2->inode)
+               return 1;
+
+       return 0;
+}
+
+static int
+duplicated(dev_t dev, ino_t ino)
+{
+       static void *tree;
+       struct file **fpp, *fp, file = {dev, ino};
+
+       if ((fpp = tsearch(&file, &tree, cmp)) == NULL)
+               eprintf("%s:", argv0);
+
+       if (*fpp != &file)
+               return 1;
+
+       /* new file added */
+       fp = emalloc(sizeof(*fp));
+       *fp = file;
+       *fpp = fp;
+
+       return 0;
+}
+
 static void
 du(int dirfd, const char *path, struct stat *st, void *data, struct recursor 
*r)
 {
        off_t *total = data, subtotal;
 
        subtotal = nblks(st->st_blocks);
-       if (S_ISDIR(st->st_mode))
+       if (S_ISDIR(st->st_mode)) {
                recurse(dirfd, path, &subtotal, r);
+       } else if (r->follow != 'P' || st->st_nlink > 1) {
+               if (duplicated(st->st_dev, st->st_ino))
+                       goto print;
+       }
+
        *total += subtotal;
 
+print:
        if (!r->depth)
                printpath(*total, r->path);
        else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || 
aflag))
-- 
2.45.3


Regards,


Reply via email to