Hi, Quoth Roberto E. Vargas Caballero <k...@shike2.net>: > Quoth remph <l...@disroot.org>: > > +struct device { > > + dev_t devno; > > + void * inodetree; > > +}; > > + > > I can see that you define a device struct, that contains a inode > table, but I think it is just easier to create a struct like: > > struct file { > dev_t dev; > ino_t inode; > }; > > > and adjust the compare function to use both fields. Am I missing > something?
I reworked it to use only one tree. Please, tell me what you think about this other version (some small changes were also done, like for example changing isduplicated to duplicated, because the is[a-z]* namespace is reserved to the implementation, and some bracing changes to accomodate to the style used in suckless). --- 8< --- >From e8fe04c543eab13e892beda05cb9bccb9e4f441e Mon Sep 17 00:00:00 2001 From: remph <l...@disroot.org> Date: Mon, 3 Mar 2025 19:52:37 +0100 Subject: [PATCH] du: Dedup hardlinks Conform to POSIX, which says `Files with multiple links shall be counted and written for only one entry,' in the 2008[1] and 2013[2] editions, and uses more words to say the same thing in the 2017[3] and 2024[4] editions. This patch also keeps inodes between operands and dedups symlinks if applicable, which are implementation-defined in 2017 and required in 2024. See also the `RATIONALE' section in the 2024 edition. [1] https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html [2] https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html [3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html [4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html --- du.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/du.c b/du.c index 1815a02..782b09a 100644 --- a/du.c +++ b/du.c @@ -5,6 +5,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <search.h> #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -20,6 +21,11 @@ static int aflag = 0; static int sflag = 0; static int hflag = 0; +struct file { + dev_t devno; + ino_t inode; +}; + static void printpath(off_t n, const char *path) { @@ -35,16 +41,61 @@ nblks(blkcnt_t blocks) return (512 * blocks + blksize - 1) / blksize; } +static int +cmp(const void *p1, const void *p2) +{ + const struct file *f1 = p1, *f2 = p2; + + if (f1->devno > f2->devno) + return -1; + if (f1->devno < f2->devno) + return 1; + + /* f1->devno == f2->devno */ + if (f1->inode < f2->inode) + return -1; + if (f1->inode > f2->inode) + return 1; + + return 0; +} + +static int +duplicated(dev_t dev, ino_t ino) +{ + static void *tree; + struct file **fpp, *fp, file = {dev, ino}; + + if ((fpp = tsearch(&file, &tree, cmp)) == NULL) + eprintf("%s:", argv0); + + if (*fpp != &file) + return 1; + + /* new file added */ + fp = emalloc(sizeof(*fp)); + *fp = file; + *fpp = fp; + + return 0; +} + static void du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r) { off_t *total = data, subtotal; subtotal = nblks(st->st_blocks); - if (S_ISDIR(st->st_mode)) + if (S_ISDIR(st->st_mode)) { recurse(dirfd, path, &subtotal, r); + } else if (r->follow != 'P' || st->st_nlink > 1) { + if (duplicated(st->st_dev, st->st_ino)) + goto print; + } + *total += subtotal; +print: if (!r->depth) printpath(*total, r->path); else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || aflag)) -- 2.45.3 Regards,