On 2023-03-04 15:33, Christoph Anton Mitterer wrote:

But would symlinks (i.e. their length) count for it?

Sure, because you can read symlinks by using readlink, and that gives you their lengths.

Come to think of it, POSIX specifies st_size only for regular files and symlinks among the files you'll find in a directory. So du --apparent should count st_size only for these file types; it should ignore st_size for other file types unless we know somehow that those sizes make sense (which for directories is problematic for the reasons you mention).


What about hardlinked files, would they count once or n times?

That's an independent axis and is handled by -l. Hard links are not a file type.


               b      block (buffered) special
               c      character (unbuffered) special
               d      directory
               p      named pipe (FIFO)
               f      regular file
               l      symbolic link
               s      socket
               D      door (Solaris)

I expect Coreutils's already-existing usable_st_function should tell us which types have usable st_size. This will exclude directories, which should be the right thing for your use case.


So I installed the attached patch to fix du --apparent to count sizes only when st_size is well-defined. This should address your use case so I'm boldly closing the bug report.
From 110bcd28386b1f47a4cd876098acb708fdcbbb25 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 4 Mar 2023 16:51:11 -0800
Subject: [PATCH] du: --apparent counts only symlinks and regular

Problem reported by Christoph Anton Mitterer (Bug#61884).
* src/du.c (process_file): When counting apparent sizes, count
only usable st_size members.
* tests/du/apparent.sh: New file.
* tests/local.mk (all_root_tests): Add it.
---
 NEWS                 |  5 +++++
 doc/coreutils.texi   |  3 +++
 src/du.c             |  8 ++++----
 tests/du/apparent.sh | 33 +++++++++++++++++++++++++++++++++
 tests/local.mk       |  1 +
 5 files changed, 46 insertions(+), 4 deletions(-)
 create mode 100755 tests/du/apparent.sh

diff --git a/NEWS b/NEWS
index 568994ff5..5b0dc939c 100644
--- a/NEWS
+++ b/NEWS
@@ -98,6 +98,11 @@ GNU coreutils NEWS                                    -*- outline -*-
   to support unusual devices that may have this constraint.
   [behavior inadvertently changed in coreutils-7.2]
 
+  du --apparent now counts apparent sizes only of regular files and
+  symbolic links.  POSIX does not specify the meaning of apparent
+  sizes (i.e., st_size) for other file types, and counting those sizes
+  could cause confusing and unwanted size mismatches.
+
   'ls -v' and 'sort -V' go back to sorting ".0" before ".A",
   reverting to the behavior in coreutils-9.0 and earlier.
   This behavior is now documented.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index b07a330eb..f0e46b9ee 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -12429,6 +12429,9 @@ dd bs=1 seek=2GiB if=/dev/null of=big
 has an apparent size of 2 GiB, yet on most modern
 file systems, it actually uses almost no space.
 
+Apparent sizes are meaningful only for regular files and symbolic links.
+Other file types do not contribute to apparent size.
+
 @item -B @var{size}
 @itemx --block-size=@var{size}
 @opindex -B
diff --git a/src/du.c b/src/du.c
index 8bffc1aa7..025a587d7 100644
--- a/src/du.c
+++ b/src/du.c
@@ -131,7 +131,7 @@ struct dulevel
 static bool opt_all = false;
 
 /* If true, rather than using the device usage of each file,
-   use the apparent size (a la stat.st_size).  */
+   use the apparent size (stat.st_size if usable, 0 otherwise).  */
 static bool apparent_size = false;
 
 /* If true, count each hard link of files with multiple links.  */
@@ -494,8 +494,8 @@ process_file (FTS *fts, FTSENT *ent)
   size_t level;
   static size_t n_alloc;
   /* First element of the structure contains:
-     The sum of the st_size values of all entries in the single directory
-     at the corresponding level.  Although this does include the st_size
+     The sum of the sizes of all entries in the single directory
+     at the corresponding level.  Although this does include the sizes
      corresponding to each subdirectory, it does not include the size of
      any file in a subdirectory. Also corresponding last modified date.
      Second element of the structure contains:
@@ -588,7 +588,7 @@ process_file (FTS *fts, FTSENT *ent)
 
   duinfo_set (&dui,
               (apparent_size
-               ? MAX (0, sb->st_size)
+               ? (usable_st_size (sb) ? MAX (0, sb->st_size) : 0)
                : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE),
               (time_type == time_mtime ? get_stat_mtime (sb)
                : time_type == time_atime ? get_stat_atime (sb)
diff --git a/tests/du/apparent.sh b/tests/du/apparent.sh
new file mode 100755
index 000000000..dba3031d4
--- /dev/null
+++ b/tests/du/apparent.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+# Exercise du's --apparent-size option.
+
+# Copyright 2023 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ du
+
+mkdir -p d || framework_failure_
+for f in $(seq 100); do
+  echo foo >d/$f || framework_failure_
+done
+
+du -b d/* >separate || fail=1
+du -b d   >together || fail=1
+separate_sum=$($AWK '{sum+=$1}END{print sum}' separate) || framework_failure_
+together_sum=$($AWK '{sum+=$1}END{print sum}' together) || framework_failure_
+test $separate_sum -eq $together_sum || fail=1
+
+Exit $fail
diff --git a/tests/local.mk b/tests/local.mk
index c8db95e99..1fe04235d 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -118,6 +118,7 @@ all_root_tests =				\
   tests/dd/skip-seek-past-dev.sh		\
   tests/df/problematic-chars.sh			\
   tests/df/over-mount-device.sh			\
+  tests/du/apparent.sh				\
   tests/du/bind-mount-dir-cycle.sh		\
   tests/du/bind-mount-dir-cycle-v2.sh		\
   tests/id/setgid.sh				\
-- 
2.37.2

Reply via email to