On 24/06/2025 01:16, Pádraig Brady wrote:
On 23/06/2025 09:24, Jaehoon Jang wrote:
=================================================================
==1151699==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x6150000004f9 at pc 0x0000004d153f bp 0x7fff937f0410 sp 0x7fff937f0408
WRITE of size 1 at 0x6150000004f9 thread T0
      #0 0x4d153e in dump_strings coreutils/src/od.c:1570:14

Nice fuzzing.

There looks to be all sorts of off by one errors in the dump_strings() function.
The issue is most easily demonstrated with:

    printf '%100s' | tr ' ' . | valgrind od -N100 -S99

The following should fix this I think.
I've only analyzed it for a few minutes, so I'll look more tomorrow.
The following should also fix the printed offset,
and also support the -N100 -S100 combination.

The previous patch didn't handle the invalid address output in all cases.
Also I didn't see a need for both read() loops in this function,
so I simplified the function in the attached more complete patch.

Marking this as done.

I'll apply this later.

thanks again,
Padraig.
From 059bc3e01854bae8a443f1acf16b83734baafc1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Tue, 24 Jun 2025 01:17:12 +0100
Subject: [PATCH] od: fix various off-by-one issues with --strings with -N

* src/od.c (dump_strings): There are three related issues here
due to not accounting for the terminating NUL char appropriately.

1. Ensure BUF always has enough space for the terminating NUL.
This avoids CWE-122: Heap-based Buffer Overflow,
where we wrote a single NUL byte directly after the allocated buffer.
I.e., there should be no buffer overflow with:
  printf '%100s' | od -N100 -S1

2. Ensure we support -S == -N (END_OFFSET - STRING_MIN == ADDRESS):
I.e., there should be output with:
  printf '%100s' | od -N10 -S10

3. Ensure we always output a valid address by ensuring
the ADDRESS and I variables are kept in sync.
I.e., this should output address 0000000 not 1777777777777777777777:
  printf '%100s' | od -N10 -S1

As well as fixing these we simplify by using a single loop
to read the data, rather than two.

* doc/coreutils.texi (od invocation): Clarify that -N
implicitly NUL terminates strings.
* tests/od/od-N.sh: Add test cases.
* NEWS: Mention the bug fixes.

Fixes https://bugs.gnu.org/78880
---
 NEWS               |  4 ++++
 doc/coreutils.texi |  2 ++
 src/od.c           | 50 +++++++++++++++++-----------------------------
 tests/od/od-N.sh   | 29 +++++++++++++++++++++++++--
 4 files changed, 51 insertions(+), 34 deletions(-)

diff --git a/NEWS b/NEWS
index 8b3d9d3cc..a05d8f1ba 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,10 @@ GNU coreutils NEWS                                    -*- outline -*-
   copying to non-NFS files from NFSv4 files with trivial ACLs.
   [bug introduced in coreutils-9.6]
 
+  od --strings with -N now works correctly.  Peviously od might
+  write a NUL byte after a heap buffer, or output invalid addresses.
+  [These bugs were present in "the beginning".]
+
   sort with key character offsets of SIZE_MAX, could induce
   a read of 1 byte before an allocated heap buffer. For example:
   'sort +0.18446744073709551615R input' on 64 bit systems.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index a1d45fb30..865ceff05 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -2075,6 +2075,8 @@ least @var{bytes} consecutive printable characters,
 followed by a zero byte (ASCII NUL).
 Prefixes and suffixes on @var{bytes} are interpreted as for the
 @option{-j} option.
+If combined with the @option{-N} option, truncated strings
+are considered ASCII NUL terminated.
 
 If @var{bytes} is omitted with @option{--strings}, the default is 3.
 
diff --git a/src/od.c b/src/od.c
index 88d467c73..1c9774142 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1505,51 +1505,33 @@ dump (void)
 }
 
 /* STRINGS mode.  Find each "string constant" in the input.
-   A string constant is a run of at least 'string_min' ASCII
-   graphic (or formatting) characters terminated by a null.
+   A string constant is a run of at least STRING_MIN
+   printable characters terminated by a NUL or END_OFFSET.
    Based on a function written by Richard Stallman for a
    traditional version of od.  Return true if successful.  */
 
 static bool
 dump_strings (void)
 {
-  idx_t bufsize = MAX (100, string_min);
+  idx_t bufsize = MAX (100, string_min + 1);
   char *buf = xmalloc (bufsize);
   uintmax_t address = n_bytes_to_skip;
   bool ok = true;
 
   while (true)
     {
-      idx_t i;
-      int c;
-
-      /* See if the next 'string_min' chars are all printing chars.  */
     tryline:
+      idx_t i = 0;
+      int c = 1;  /* Init to 1 so can distinguish if NUL read.  */
 
       if (limit_bytes_to_format
-          && (end_offset < string_min || end_offset - string_min <= address))
+          && (end_offset < string_min || end_offset - string_min < address))
         break;
 
-      for (i = 0; i < string_min; i++)
-        {
-          ok &= read_char (&c);
-          address++;
-          if (c < 0)
-            {
-              free (buf);
-              return ok;
-            }
-          if (! isprint (c))
-            /* Found a non-printing.  Try again starting with next char.  */
-            goto tryline;
-          buf[i] = c;
-        }
-
-      /* We found a run of 'string_min' printable characters.
-         Now see if it is terminated with a null byte.  */
+      /* Store consecutive printable characters to BUF.  */
       while (!limit_bytes_to_format || address < end_offset)
         {
-          if (i == bufsize)
+          if (i == bufsize - 1)
             buf = xpalloc (buf, &bufsize, 1, -1, sizeof *buf);
           ok &= read_char (&c);
           address++;
@@ -1558,17 +1540,21 @@ dump_strings (void)
               free (buf);
               return ok;
             }
+          buf[i++] = c;
           if (c == '\0')
-            break;		/* It is; print this string.  */
+            break;		/* Print this string.  */
           if (! isprint (c))
-            goto tryline;	/* It isn't; give up on this string.  */
-          buf[i++] = c;		/* String continues; store it all.  */
+            goto tryline;	/* Give up on this string.  */
         }
 
-      /* If we get here, the string is all printable and null-terminated,
-         so print it.  It is all in 'buf' and 'i' is its length.  */
+      if (i - !c < string_min)
+        goto tryline;
+
       buf[i] = 0;
-      format_address (address - i - 1, ' ');
+
+      /* If we get here, the string is all printable, so print it.  */
+
+      format_address (address - i, ' ');
 
       for (i = 0; (c = buf[i]); i++)
         {
diff --git a/tests/od/od-N.sh b/tests/od/od-N.sh
index b057c9b83..9f9fe914b 100755
--- a/tests/od/od-N.sh
+++ b/tests/od/od-N.sh
@@ -20,8 +20,6 @@
 print_ver_ od
 
 echo abcdefg > in || framework_failure_
-
-
 (od -An -N3 -c; od -An -N3 -c) < in > out
 cat <<EOF > exp || framework_failure_
    a   b   c
@@ -29,4 +27,31 @@ cat <<EOF > exp || framework_failure_
 EOF
 compare exp out || fail=1
 
+# coreutils <= 9.7 would buffer overflow with
+# a single NUL byte after the heap buffer
+printf '%100s' | od -N100 -S1 > out || fail=1
+printf '%07o %100s\n' 0 '' > exp || framework_failure_
+compare exp out || fail=1
+
+# coreutils <= 9.7 would output nothing
+printf '%100s' | od -N10 -S10 > out || fail=1
+printf '%07o %10s\n' 0 '' > exp || framework_failure_
+compare exp out || fail=1
+
+# coreutils <= 9.7 would output an invalid address
+printf '%100s' | od -N10 -S1 > out || fail=1
+printf '%07o %10s\n' 0 '' > exp || framework_failure_
+compare exp out || fail=1
+
+# Ensure -S limits appropriately
+printf '%10s\000' | od -N11 -S11 > out || fail=1
+compare /dev/null out || fail=1
+printf '%10s\000' | od -S11 > out || fail=1
+compare /dev/null out || fail=1
+printf '%10s' | od -S10 > out || fail=1 # Ignore unterminated at EOF?
+compare /dev/null out || fail=1
+printf '%10s\000%10s\000' | od -S10 > out || fail=1
+printf '%07o %10s\n' 0 '' 11 '' > exp || framework_failure_
+compare exp out || fail=1
+
 Exit $fail
-- 
2.49.0

Reply via email to