On 2025-06-24 18:31, Jim Meyering wrote:
That goes way back. I think od.c
was the second stand-alone program I contributed to coreutils (first
was tr). The earliest email I still have that mentions it is from
1997-01 prior to textutils-1.22, but that was just a ChangeLog entry
about adapting to a changed strtod API.

I have a soft spot for 'od' as I remember using it in Unix in the 1970s. So I looked for nearby bugs and found a few, mostly integer overflows. I installed the attached patches to refactor the source and to fix the bugs I found.

You might be amused by patch 0007, which fixes a POSIX conformance bug introduced in January 1995, in what is now Git commit 851162a0da41f2b6b08a8c1ed045086db9a443a0. Evidently this POSIX-required feature is not often used! The NEWS item in the fix says "[bug introduced on 1995-01-25]" instead of the usual "[bug introduced in coreutils-N]" comment because I don't know how to relate that commit to a version number (would it be textutils? probably doesn't matter).

You might also be amused (or appalled) by patch 0007's hacky fix. I couldn't bestir myself to write a cleaner fix. The hacky fix doesn't require memory allocation so in some sense it's better than a cleaner one would be.
From 0d1c25d1cb6d0ce119775368a0fabc7644393f6e Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 08:15:42 -0700
Subject: [PATCH 01/19] od: fix theoretical size_t malloc overflow

* src/od.c (dump, dump_strings): Use idx_t allocators
rather than size_t allocators, to avoid unchecked integer
overflow on theoretical platforms where SIZE_MAX < IDX_MAX.
---
 src/od.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/od.c b/src/od.c
index 6b5c8675f..7306b0f9a 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1432,7 +1432,7 @@ dump (void)
   bool ok = true;
   size_t n_bytes_read;
 
-  block[0] = xnmalloc (2, bytes_per_block);
+  block[0] = xinmalloc (2, bytes_per_block);
   block[1] = block[0] + bytes_per_block;
 
   current_offset = n_bytes_to_skip;
@@ -1514,7 +1514,7 @@ static bool
 dump_strings (void)
 {
   idx_t bufsize = MAX (100, string_min + 1);
-  char *buf = xmalloc (bufsize);
+  char *buf = ximalloc (bufsize);
   uintmax_t address = n_bytes_to_skip;
   bool ok = true;
 
-- 
2.50.0

From 69b07cc58de0a86f7b06d6709049077c6bd486ea Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 08:07:54 -0700
Subject: [PATCH 02/19] od: fix another off-by-one issue with --strings

* src/od.c (main): Fix off-by-one error in string_min limit.
---
 src/od.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/od.c b/src/od.c
index 7306b0f9a..439f71e5b 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1738,10 +1738,10 @@ main (int argc, char **argv)
               if (s_err != LONGINT_OK)
                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
 
-              /* The minimum string length may be no larger than
+              /* The minimum string length must be less than
                  MIN (IDX_MAX, SIZE_MAX), since we may allocate a
-                 buffer of this size.  */
-              if (MIN (IDX_MAX, SIZE_MAX) < tmp)
+                 buffer of this size + 1.  */
+              if (MIN (IDX_MAX, SIZE_MAX) <= tmp)
                 error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
 
               string_min = tmp;
-- 
2.50.0

From 671d79a0b73a9a38ea0c267612ad34f284e3a8f1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 24 Jun 2025 19:13:20 -0700
Subject: [PATCH 03/19] maint: assume long long int
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It’s long been safe to assume C99+ support for long long int.
* .gitignore: Remove m4/longlong.m4.
* bootstrap.conf (buildreq): Boost git prereq from 1.4.4 to 1.5.5,
syncing with Gnulib.
(bootstrap_post_import_hook): Remove m4/longlong.m4.
* m4/jm-macros.m4 (gl_CHECK_ALL_TYPES):
No need to require AC_TYPE_UNSIGNED_LONG_LONG_INT.
* src/factor.c (DItype, UDItype):
* src/od.c (main):
Assume HAVE_LONG_LONG_INT.
* src/od.c: (unsigned_long_long_int):
Remove.  All uses replaced with unsigned long long int.
---
 .gitignore      |  1 -
 bootstrap.conf  | 13 ++++++++++---
 m4/jm-macros.m4 |  1 -
 src/factor.c    |  5 -----
 src/od.c        | 18 ++++--------------
 5 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/.gitignore b/.gitignore
index 16424edf8..87eacf621 100644
--- a/.gitignore
+++ b/.gitignore
@@ -157,7 +157,6 @@
 /m4/lib-link.m4
 /m4/lib-prefix.m4
 /m4/lock.m4
-/m4/longlong.m4
 /m4/nls.m4
 /m4/po.m4
 /m4/printf-posix.m4
diff --git a/bootstrap.conf b/bootstrap.conf
index af7092c32..8488038a7 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -1,4 +1,4 @@
-# Bootstrap configuration.
+# Bootstrap configuration.                                          -*- sh -*-
 
 # Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
@@ -340,7 +340,7 @@ XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
  --from-code=UTF-8\\\
 '
 
-# Append these, since we use the propername module.
+# Append these, since we use the propername-lite module.
 see_manual='"This is a proper name. See the gettext manual, section Names."'
 see_manual=\'"$see_manual"\'
 XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
@@ -359,7 +359,7 @@ automake   1.11.2
 autopoint  0.19.2
 bison      -
 gettext    0.19.2
-git        1.4.4
+git        1.5.5
 gperf      -
 gzip       -
 m4         -
@@ -385,6 +385,13 @@ bootstrap_post_import_hook ()
     && chmod a-w $tmp-1 $tmp-2 \
     && mv -f $tmp-1 $m4f && mv -f $tmp-2 $mkf)
 
+  # If "AM_GNU_GETTEXT(external" or "AM_GNU_GETTEXT([external]" appears
+  # in configure.ac, remove a file unnecessarily imported by autopoint.
+  if grep '^[	 ]*AM_GNU_GETTEXT(\[*external]*[,)]' \
+          configure.ac >/dev/null 2>&1; then
+    rm -f m4/longlong.m4
+  fi
+
   # Regenerate src/single-binary.mk
   (mkf=src/single-binary.mk tmp=single-binary.tmp \
     && rm -f $mkf $tmp \
diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4
index c7cad56ca..883b59677 100644
--- a/m4/jm-macros.m4
+++ b/m4/jm-macros.m4
@@ -204,7 +204,6 @@ AC_DEFUN([gl_CHECK_ALL_TYPES],
   AC_REQUIRE([gl_BIGENDIAN])
   AC_REQUIRE([AC_C_VOLATILE])
   AC_REQUIRE([AC_C_INLINE])
-  AC_REQUIRE([AC_TYPE_UNSIGNED_LONG_LONG_INT])
 
   AC_REQUIRE([gl_CHECK_ALL_HEADERS])
   AC_CHECK_MEMBERS(
diff --git a/src/factor.c b/src/factor.c
index 792ec96c4..948bf0527 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -171,13 +171,8 @@ typedef unsigned int UDItype    __attribute__ ((mode (DI)));
 typedef unsigned char UQItype;
 typedef          long SItype;
 typedef unsigned long int USItype;
-#  if HAVE_LONG_LONG_INT
 typedef long long int DItype;
 typedef unsigned long long int UDItype;
-#  else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
-typedef long int DItype;
-typedef unsigned long int UDItype;
-#  endif
 # endif
 # define LONGLONG_STANDALONE     /* Don't require GMP's longlong.h mdep files */
 
diff --git a/src/od.c b/src/od.c
index 439f71e5b..c1cdd310a 100644
--- a/src/od.c
+++ b/src/od.c
@@ -43,14 +43,6 @@
 /* The default number of input bytes per output line.  */
 #define DEFAULT_BYTES_PER_BLOCK 16
 
-#if HAVE_UNSIGNED_LONG_LONG_INT
-typedef unsigned long long int unsigned_long_long_int;
-#else
-/* This is just a place-holder to avoid a few '#if' directives.
-   In this case, the type isn't actually used.  */
-typedef unsigned long int unsigned_long_long_int;
-#endif
-
 #if FLOAT16_SUPPORTED
   /* Available since clang 6 (2018), and gcc 7 (2017).  */
   typedef _Float16 float16;
@@ -100,7 +92,7 @@ enum output_format
     CHARACTER
   };
 
-#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int)
+#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned long long int)
 
 /* The maximum number of bytes needed for a format string, including
    the trailing nul.  Each format string expects a variable amount of
@@ -180,7 +172,7 @@ static const int width_bytes[] =
   sizeof (short int),
   sizeof (int),
   sizeof (long int),
-  sizeof (unsigned_long_long_int),
+  sizeof (unsigned long long int),
 #if BF16_SUPPORTED
   sizeof (bfloat16),
 #else
@@ -506,7 +498,7 @@ PRINT_TYPE (print_s_short, short int)
 PRINT_TYPE (print_short, unsigned short int)
 PRINT_TYPE (print_int, unsigned int)
 PRINT_TYPE (print_long, unsigned long int)
-PRINT_TYPE (print_long_long, unsigned_long_long_int)
+PRINT_TYPE (print_long_long, unsigned long long int)
 
 PRINT_FLOATTYPE (print_bfloat, bfloat16, ftoastr, FLT_BUFSIZE_BOUND)
 PRINT_FLOATTYPE (print_halffloat, float16, ftoastr, FLT_BUFSIZE_BOUND)
@@ -1637,11 +1629,9 @@ main (int argc, char **argv)
   integral_type_size[sizeof (short int)] = SHORT;
   integral_type_size[sizeof (int)] = INT;
   integral_type_size[sizeof (long int)] = LONG;
-#if HAVE_UNSIGNED_LONG_LONG_INT
   /* If 'long int' and 'long long int' have the same size, it's fine
      to overwrite the entry for 'long' with this one.  */
-  integral_type_size[sizeof (unsigned_long_long_int)] = LONG_LONG;
-#endif
+  integral_type_size[sizeof (unsigned long long int)] = LONG_LONG;
 
   for (idx_t i = 0; i <= MAX_FP_TYPE_SIZE; i++)
     fp_type_size[i] = NO_SIZE;
-- 
2.50.0

From dbe4e2f42830fd5f90391472802309bbc9d1a8ef Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 24 Jun 2025 20:01:04 -0700
Subject: [PATCH 04/19] =?UTF-8?q?od:=20don=E2=80=99t=20assume=20no=20holes?=
 =?UTF-8?q?=20in=20wide=20unsigned?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also, fix minor related typos.
* src/od.c (MAX_INTEGRAL_TYPE_SIZE, MAX_ADDRESS_LENGTH):
Now a constant, not a macro.
(MAX_INTEGRAL_TYPE_WIDTH): New constant.  Use it instead of
CHAR_BIT, so as not to assume that uintmax_t and unsigned long
long int are hole-free.  This doesn’t matter on practical porting
targets, though there is still a mainframe or two that have holes.
(FMT_BYTES_ALLOCATED): Fix typo by changing "jd" to "jo".
Fix off-by-one typo in static assertion.
---
 src/od.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/od.c b/src/od.c
index c1cdd310a..8781a2258 100644
--- a/src/od.c
+++ b/src/od.c
@@ -92,7 +92,8 @@ enum output_format
     CHARACTER
   };
 
-#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned long long int)
+enum { MAX_INTEGRAL_TYPE_SIZE = sizeof (unsigned long long int) };
+enum { MAX_INTEGRAL_TYPE_WIDTH = ULLONG_WIDTH };
 
 /* The maximum number of bytes needed for a format string, including
    the trailing nul.  Each format string expects a variable amount of
@@ -104,13 +105,13 @@ enum
            (sizeof "%*.99" + 1
             + MAX (sizeof "ld",
                    MAX (sizeof "jd",
-                        MAX (sizeof "jd",
+                        MAX (sizeof "jo",
                              MAX (sizeof "ju",
                                   sizeof "jx")))))
   };
 
 /* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable.  */
-static_assert (MAX_INTEGRAL_TYPE_SIZE * CHAR_BIT / 3 <= 99);
+static_assert (MAX_INTEGRAL_TYPE_WIDTH <= 3 * 99);
 
 /* Each output format specification (from '-t spec' or from
    old-style options) is represented by one of these structures.  */
@@ -202,8 +203,7 @@ static int address_base;
 
 /* The number of octal digits required to represent the largest
    address value.  */
-#define MAX_ADDRESS_LENGTH \
-  ((sizeof (uintmax_t) * CHAR_BIT + CHAR_BIT - 1) / 3)
+enum { MAX_ADDRESS_LENGTH = UINTMAX_WIDTH / 3 + (UINTMAX_WIDTH % 3 != 0) };
 
 /* Width of a normal address.  */
 static int address_pad_len;
-- 
2.50.0

From 35d9bf6915bff5d88dd2246ae5a157db838c64f1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 24 Jun 2025 20:34:29 -0700
Subject: [PATCH 05/19] od: prefer idx_t to size_t

This helps find overflow bugs when compiling with -fsanitize=undefined.
* src/od.c (struct tspec, bytes_per_block, PRINT_FIELDS)
(PRINT_TYPE, print_named_ascii, print_ascii, decode_one_format)
(skip, write_block, read_block, dump, main):
Use idx_t, not size_t.
---
 src/od.c | 57 ++++++++++++++++++++++++--------------------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/src/od.c b/src/od.c
index 8781a2258..f385d6ce5 100644
--- a/src/od.c
+++ b/src/od.c
@@ -123,7 +123,7 @@ struct tspec
        fields to leave blank.  WIDTH is width of one field, excluding
        leading space, and PAD is total pad to divide among FIELDS.
        PAD is at least as large as FIELDS.  */
-    void (*print_function) (size_t fields, size_t blank, void const *data,
+    void (*print_function) (idx_t fields, idx_t blank, void const *data,
                             char const *fmt, int width, int pad);
     char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d".  */
     bool hexl_mode_trailer;
@@ -261,7 +261,7 @@ static idx_t n_specs_allocated;
    a multiple of the least common multiple of the sizes associated with
    the specified output types.  It should be as large as possible, but
    no larger than 16 -- unless specified with the -w option.  */
-static size_t bytes_per_block;
+static idx_t bytes_per_block;
 
 /* Human-readable representation of *file_list (for error messages).
    It differs from file_list[-1] only when file_list[-1] is "-".  */
@@ -452,25 +452,23 @@ Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\
 
 #define PRINT_FIELDS(N, T, FMT_STRING_DECL, ACTION)                     \
 static void                                                             \
-N (size_t fields, size_t blank, void const *block,                      \
+N (idx_t fields, idx_t blank, void const *block,			\
    FMT_STRING_DECL, int width, int pad)                                 \
 {                                                                       \
   T const *p = block;                                                   \
-  uintmax_t i;                                                          \
   int pad_remaining = pad;                                              \
-  for (i = fields; blank < i; i--)                                      \
+  for (idx_t i = fields; blank < i; i--)				\
     {                                                                   \
       int next_pad = pad * (i - 1) / fields;                            \
       int adjusted_width = pad_remaining - next_pad + width;            \
       T x;                                                              \
       if (input_swap && sizeof (T) > 1)                                 \
         {                                                               \
-          size_t j;                                                     \
           union {                                                       \
             T x;                                                        \
             char b[sizeof (T)];                                         \
           } u;                                                          \
-          for (j = 0; j < sizeof (T); j++)                              \
+          for (idx_t j = 0; j < sizeof (T); j++)			\
             u.b[j] = ((char const *) p)[sizeof (T) - 1 - j];            \
           x = u.x;                                                      \
         }                                                               \
@@ -510,10 +508,10 @@ PRINT_FLOATTYPE (print_long_double, long double, ldtoastr, LDBL_BUFSIZE_BOUND)
 #undef PRINT_FLOATTYPE
 
 static void
-dump_hexl_mode_trailer (size_t n_bytes, char const *block)
+dump_hexl_mode_trailer (idx_t n_bytes, char const *block)
 {
   fputs ("  >", stdout);
-  for (size_t i = n_bytes; i > 0; i--)
+  for (idx_t i = n_bytes; i > 0; i--)
     {
       unsigned char c = *block++;
       unsigned char c2 = (isprint (c) ? c : '.');
@@ -523,14 +521,13 @@ dump_hexl_mode_trailer (size_t n_bytes, char const *block)
 }
 
 static void
-print_named_ascii (size_t fields, size_t blank, void const *block,
+print_named_ascii (idx_t fields, idx_t blank, void const *block,
                    MAYBE_UNUSED char const *unused_fmt_string,
                    int width, int pad)
 {
   unsigned char const *p = block;
-  uintmax_t i;
   int pad_remaining = pad;
-  for (i = fields; blank < i; i--)
+  for (idx_t i = fields; blank < i; i--)
     {
       int next_pad = pad * (i - 1) / fields;
       int masked_c = *p++ & 0x7f;
@@ -554,14 +551,13 @@ print_named_ascii (size_t fields, size_t blank, void const *block,
 }
 
 static void
-print_ascii (size_t fields, size_t blank, void const *block,
+print_ascii (idx_t fields, idx_t blank, void const *block,
              MAYBE_UNUSED char const *unused_fmt_string, int width,
              int pad)
 {
   unsigned char const *p = block;
-  uintmax_t i;
   int pad_remaining = pad;
-  for (i = fields; blank < i; i--)
+  for (idx_t i = fields; blank < i; i--)
     {
       int next_pad = pad * (i - 1) / fields;
       unsigned char c = *p++;
@@ -659,7 +655,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
   enum size_spec size_spec;
   int size;
   enum output_format fmt;
-  void (*print_function) (size_t, size_t, void const *, char const *,
+  void (*print_function) (idx_t, idx_t, void const *, char const *,
                           int, int);
   char const *p;
   char c;
@@ -868,7 +864,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
 
       {
         struct lconv const *locale = localeconv ();
-        size_t decimal_point_len =
+        idx_t decimal_point_len =
           (locale->decimal_point[0] ? strlen (locale->decimal_point) : 1);
 
         switch (+size_spec)
@@ -1111,7 +1107,7 @@ skip (uintmax_t n_skip)
           else
             {
               char buf[BUFSIZ];
-              size_t n_bytes_read, n_bytes_to_read = BUFSIZ;
+              idx_t n_bytes_read, n_bytes_to_read = BUFSIZ;
 
               while (0 < n_skip)
                 {
@@ -1228,7 +1224,7 @@ format_address_label (uintmax_t address, char c)
    That condition may be false only for the last input block.  */
 
 static void
-write_block (uintmax_t current_offset, size_t n_bytes,
+write_block (uintmax_t current_offset, idx_t n_bytes,
              char const *prev_block, char const *curr_block)
 {
   static bool first = true;
@@ -1329,7 +1325,7 @@ read_char (int *c)
    as usual and return false.  Otherwise return true.  */
 
 static bool
-read_block (size_t n, char *block, size_t *n_bytes_in_buffer)
+read_block (idx_t n, char *block, idx_t *n_bytes_in_buffer)
 {
   bool ok = true;
 
@@ -1339,11 +1335,9 @@ read_block (size_t n, char *block, size_t *n_bytes_in_buffer)
 
   while (in_stream != nullptr)	/* EOF.  */
     {
-      size_t n_needed;
-      size_t n_read;
-
-      n_needed = n - *n_bytes_in_buffer;
-      n_read = fread (block + *n_bytes_in_buffer, 1, n_needed, in_stream);
+      idx_t n_needed = n - *n_bytes_in_buffer;
+      idx_t n_read = fread (block + *n_bytes_in_buffer,
+                            1, n_needed, in_stream);
 
       *n_bytes_in_buffer += n_read;
 
@@ -1422,7 +1416,7 @@ dump (void)
   uintmax_t current_offset;
   bool idx = false;
   bool ok = true;
-  size_t n_bytes_read;
+  idx_t n_bytes_read;
 
   block[0] = xinmalloc (2, bytes_per_block);
   block[1] = block[0] + bytes_per_block;
@@ -1433,7 +1427,7 @@ dump (void)
     {
       while (ok)
         {
-          size_t n_needed;
+          idx_t n_needed;
           if (current_offset >= end_offset)
             {
               n_bytes_read = 0;
@@ -1472,14 +1466,11 @@ dump (void)
 
   if (n_bytes_read > 0)
     {
-      int l_c_m;
-      size_t bytes_to_write;
-
-      l_c_m = get_lcm ();
+      int l_c_m = get_lcm ();
 
       /* Ensure zero-byte padding up to the smallest multiple of l_c_m that
          is at least as large as n_bytes_read.  */
-      bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
+      idx_t bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
 
       memset (block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
       write_block (current_offset, n_bytes_read, block[!idx], block[idx]);
@@ -1607,7 +1598,7 @@ main (int argc, char **argv)
   bool modern = false;
   bool width_specified = false;
   bool ok = true;
-  size_t width_per_block = 0;
+  idx_t width_per_block = 0;
   static char const multipliers[] = "bEGKkMmPQRTYZ0";
 
   /* The old-style 'pseudo starting address' to be printed in parentheses
-- 
2.50.0

From 88f30ee0a5c355701914d4446dc7ec729a344fa2 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 25 Jun 2025 23:22:37 -0700
Subject: [PATCH 06/19] od: fix some unlikely integer overflows

* src/od.c (print_n_spaces, pad_at, pad_at_overflow):
New static functions.
(struct tspec, PRINT_FIELDS, print_named_ascii, print_ascii)
(decode_one_format, write_block, main):
Use idx_t, not int, for counts that depend on the number
of bytes in an object.
(decode_one_format): Use print_n_spaces to output spaces.
(PRINT_FIELDS, print_named_ascii, print_ascii):
Use pad_at to avoid integer overflow.
(write_block): Do not use %*s to pad, as the total pad might
exceed INT_MAX.  Instead, pad by hand with putchar (' ').
(main): Use pad_at_overflow to report integer overflow due to
oversize -w.  Use better way to tell whether -w is used,
without needing IF_LINT.
* tests/od/big-w.sh: New test.
* tests/local.mk (all_tests): Add it.
---
 NEWS              |  4 ++
 src/od.c          | 96 +++++++++++++++++++++++++++++++----------------
 tests/local.mk    |  1 +
 tests/od/big-w.sh | 43 +++++++++++++++++++++
 4 files changed, 112 insertions(+), 32 deletions(-)
 create mode 100755 tests/od/big-w.sh

diff --git a/NEWS b/NEWS
index 116dd993e..4a958770c 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,10 @@ GNU coreutils NEWS                                    -*- outline -*-
   Previously it would have aborted.
   [bug introduced in coreutils-9.3]
 
+  od -w no longer silently mishandles enormous widths like 3037000500.
+  Instead, it either outputs correctly or diagnoses a too-large width.
+  [This bug was present in "the beginning".]
+
   sort with key character offsets of SIZE_MAX, could induce
   a read of 1 byte before an allocated heap buffer. For example:
   'sort +0.18446744073709551615R input' on 64 bit systems.
diff --git a/src/od.c b/src/od.c
index f385d6ce5..8bb463ca8 100644
--- a/src/od.c
+++ b/src/od.c
@@ -124,11 +124,11 @@ struct tspec
        leading space, and PAD is total pad to divide among FIELDS.
        PAD is at least as large as FIELDS.  */
     void (*print_function) (idx_t fields, idx_t blank, void const *data,
-                            char const *fmt, int width, int pad);
+                            char const *fmt, int width, idx_t pad);
     char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d".  */
     bool hexl_mode_trailer;
     int field_width; /* Minimum width of a field, excluding leading space.  */
-    int pad_width; /* Total padding to be divided among fields.  */
+    idx_t pad_width; /* Total padding to be divided among fields.  */
   };
 
 /* Convert the number of 8-bit bytes of a binary representation to
@@ -450,16 +450,44 @@ Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\
 
 /* Define the print functions.  */
 
+/* Print N spaces, where 0 <= N.
+   Do not rely on printf ("%*s", N, "") as N may exceed INT_MAX.  */
+static void
+print_n_spaces (intmax_t n)
+{
+  for (; 0 < n; n--)
+    putchar (' ');
+}
+
+/* If there are FIELDS fields, return the total padding up to the
+   start of field I, where I < FIELDS.  PAD is the total padding for
+   all fields.  The result equals (PAD * I) / FIELDS, except it does
+   not suffer from internal overflow.  */
+static idx_t
+pad_at (idx_t fields, idx_t i, idx_t pad)
+{
+  /* This implementation assumes that (FIELDS - 1)^2 does not overflow
+     intmax_t, an assumption checked by pad_at_overflow.  */
+  intmax_t m = pad % fields;
+  return pad / fields * i + m * i / fields;
+}
+static bool
+pad_at_overflow (idx_t fields)
+{
+  intmax_t product;
+  return ckd_mul (&product, fields - 1, fields - 1);
+}
+
 #define PRINT_FIELDS(N, T, FMT_STRING_DECL, ACTION)                     \
 static void                                                             \
 N (idx_t fields, idx_t blank, void const *block,			\
-   FMT_STRING_DECL, int width, int pad)                                 \
+   FMT_STRING_DECL, int width, idx_t pad)				\
 {                                                                       \
   T const *p = block;                                                   \
-  int pad_remaining = pad;                                              \
+  idx_t pad_remaining = pad;						\
   for (idx_t i = fields; blank < i; i--)				\
     {                                                                   \
-      int next_pad = pad * (i - 1) / fields;                            \
+      idx_t next_pad = pad_at (fields, i - 1, pad);			\
       int adjusted_width = pad_remaining - next_pad + width;            \
       T x;                                                              \
       if (input_swap && sizeof (T) > 1)                                 \
@@ -523,13 +551,12 @@ dump_hexl_mode_trailer (idx_t n_bytes, char const *block)
 static void
 print_named_ascii (idx_t fields, idx_t blank, void const *block,
                    MAYBE_UNUSED char const *unused_fmt_string,
-                   int width, int pad)
+                   int width, idx_t pad)
 {
   unsigned char const *p = block;
-  int pad_remaining = pad;
+  idx_t pad_remaining = pad;
   for (idx_t i = fields; blank < i; i--)
     {
-      int next_pad = pad * (i - 1) / fields;
       int masked_c = *p++ & 0x7f;
       char const *s;
       char buf[2];
@@ -545,7 +572,9 @@ print_named_ascii (idx_t fields, idx_t blank, void const *block,
           s = buf;
         }
 
-      xprintf ("%*s", pad_remaining - next_pad + width, s);
+      idx_t next_pad = pad_at (fields, i - 1, pad);
+      int adjusted_width = pad_remaining - next_pad + width;
+      xprintf ("%*s", adjusted_width, s);
       pad_remaining = next_pad;
     }
 }
@@ -553,13 +582,12 @@ print_named_ascii (idx_t fields, idx_t blank, void const *block,
 static void
 print_ascii (idx_t fields, idx_t blank, void const *block,
              MAYBE_UNUSED char const *unused_fmt_string, int width,
-             int pad)
+             idx_t pad)
 {
   unsigned char const *p = block;
-  int pad_remaining = pad;
+  idx_t pad_remaining = pad;
   for (idx_t i = fields; blank < i; i--)
     {
-      int next_pad = pad * (i - 1) / fields;
       unsigned char c = *p++;
       char const *s;
       char buf[4];
@@ -603,7 +631,9 @@ print_ascii (idx_t fields, idx_t blank, void const *block,
           s = buf;
         }
 
-      xprintf ("%*s", pad_remaining - next_pad + width, s);
+      idx_t next_pad = pad_at (fields, i - 1, pad);
+      int adjusted_width = pad_remaining - next_pad + width;
+      xprintf ("%*s", adjusted_width, s);
       pad_remaining = next_pad;
     }
 }
@@ -656,7 +686,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
   int size;
   enum output_format fmt;
   void (*print_function) (idx_t, idx_t, void const *, char const *,
-                          int, int);
+                          int, idx_t);
   char const *p;
   char c;
   int field_width;
@@ -1253,22 +1283,24 @@ write_block (uintmax_t current_offset, idx_t n_bytes,
       for (idx_t i = 0; i < n_specs; i++)
         {
           int datum_width = width_bytes[spec[i].size];
-          int fields_per_block = bytes_per_block / datum_width;
-          int blank_fields = (bytes_per_block - n_bytes) / datum_width;
+          idx_t fields_per_block = bytes_per_block / datum_width;
+          idx_t blank_fields = (bytes_per_block - n_bytes) / datum_width;
           if (i == 0)
             format_address (current_offset, '\0');
           else
-            printf ("%*s", address_pad_len, "");
+            print_n_spaces (address_pad_len);
           (*spec[i].print_function) (fields_per_block, blank_fields,
                                      curr_block, spec[i].fmt_string,
                                      spec[i].field_width, spec[i].pad_width);
           if (spec[i].hexl_mode_trailer)
             {
-              /* space-pad out to full line width, then dump the trailer */
+              /* Space-pad out to full line width, then dump the trailer.  */
               int field_width = spec[i].field_width;
-              int pad_width = (spec[i].pad_width * blank_fields
-                               / fields_per_block);
-              printf ("%*s", blank_fields * field_width + pad_width, "");
+              for (idx_t f = 0; f < blank_fields; f++)
+                print_n_spaces (field_width);
+              idx_t pad_width = pad_at (fields_per_block, blank_fields,
+                                        spec[i].pad_width);
+              print_n_spaces (pad_width);
               dump_hexl_mode_trailer (n_bytes, curr_block);
             }
           putchar ('\n');
@@ -1594,9 +1626,8 @@ main (int argc, char **argv)
 {
   int n_files;
   int l_c_m;
-  idx_t desired_width IF_LINT ( = 0);
+  idx_t desired_width = 0;
   bool modern = false;
-  bool width_specified = false;
   bool ok = true;
   idx_t width_per_block = 0;
   static char const multipliers[] = "bEGKkMmPQRTYZ0";
@@ -1792,7 +1823,6 @@ main (int argc, char **argv)
 
         case 'w':
           modern = true;
-          width_specified = true;
           if (optarg == nullptr)
             {
               desired_width = 32;
@@ -1958,9 +1988,9 @@ main (int argc, char **argv)
   /* Compute output block length.  */
   l_c_m = get_lcm ();
 
-  if (width_specified)
+  if (desired_width != 0)
     {
-      if (desired_width != 0 && desired_width % l_c_m == 0)
+      if (desired_width % l_c_m == 0)
         bytes_per_block = desired_width;
       else
         {
@@ -1980,23 +2010,25 @@ main (int argc, char **argv)
   /* Compute padding necessary to align output block.  */
   for (idx_t i = 0; i < n_specs; i++)
     {
-      int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
-      int block_width = (spec[i].field_width + 1) * fields_per_block;
+      idx_t fields_per_block = bytes_per_block / width_bytes[spec[i].size];
+      if (pad_at_overflow (fields_per_block))
+        error (EXIT_FAILURE, 0, _("%td is too large"), desired_width);
+      idx_t block_width = (spec[i].field_width + 1) * fields_per_block;
       if (width_per_block < block_width)
         width_per_block = block_width;
     }
   for (idx_t i = 0; i < n_specs; i++)
     {
-      int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
-      int block_width = spec[i].field_width * fields_per_block;
+      idx_t fields_per_block = bytes_per_block / width_bytes[spec[i].size];
+      idx_t block_width = spec[i].field_width * fields_per_block;
       spec[i].pad_width = width_per_block - block_width;
     }
 
 #ifdef DEBUG
-  printf ("lcm=%d, width_per_block=%zu\n", l_c_m, width_per_block);
+  printf ("lcm=%d, width_per_block=%td\n", l_c_m, width_per_block);
   for (idx_t i = 0; i < n_specs; i++)
     {
-      int fields_per_block = bytes_per_block / width_bytes[spec[i].size];
+      idx_t fields_per_block = bytes_per_block / width_bytes[spec[i].size];
       affirm (bytes_per_block % width_bytes[spec[i].size] == 0);
       affirm (1 <= spec[i].pad_width / fields_per_block);
       printf ("%d: fmt=\"%s\" in_width=%d out_width=%d pad=%d\n",
diff --git a/tests/local.mk b/tests/local.mk
index b68df41f7..03114f759 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -269,6 +269,7 @@ all_tests =					\
   tests/tail/overlay-headers.sh			\
   tests/tail/pid.sh				\
   tests/tail/pid-pipe.sh			\
+  tests/od/big-w.sh				\
   tests/od/od.pl				\
   tests/od/od-endian.sh				\
   tests/od/od-float.sh				\
diff --git a/tests/od/big-w.sh b/tests/od/big-w.sh
new file mode 100755
index 000000000..27c125c69
--- /dev/null
+++ b/tests/od/big-w.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# Check whether od -wN works with big N
+
+# Copyright 2025 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ od
+very_expensive_
+
+export LC_ALL=C
+
+cat >exp <<'EOF' || framework_failure_
+0000000 x >x<
+0000001
+EOF
+
+# Try values near sqrt(2**31) and sqrt(2**63).
+for w in 46340 46341 3037000500 3037000501; do
+  printf x | od -w$w -tcz 2>err | tr -s ' ' ' ' >out
+  if test -s err; then
+    test ! -s out || fail=1
+  else
+    compare exp out || fail=1
+    outbytes=$(printf x | od -w$w -tcz | wc -c)
+    expbytes=$((4*$w + 21))
+    test $expbytes -eq $outbytes || fail=1
+  fi
+done
+
+Exit $fail
-- 
2.50.0

From 66464e61f549e9f2fd35f82567345721798288f9 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 17:29:22 -0700
Subject: [PATCH 07/19] od: fix '+N.' bug

* src/od.c (parse_old_offset): First arg is now char *,
not char const *.  If a decimal number, temporarily
modify the string so that xstrtoumax does not complain
about the '.'.
* tests/od/od.pl: Test for the bug.
---
 NEWS           |  3 +++
 src/od.c       | 31 +++++++++++++++++++++++++++----
 tests/od/od.pl |  4 ++++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index 4a958770c..d0b6e794a 100644
--- a/NEWS
+++ b/NEWS
@@ -24,6 +24,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   Instead, it either outputs correctly or diagnoses a too-large width.
   [This bug was present in "the beginning".]
 
+  od +N. (where N is a decimal number) works again as per POSIX.
+  [bug introduced on 1995-01-25]
+
   sort with key character offsets of SIZE_MAX, could induce
   a read of 1 byte before an allocated heap buffer. For example:
   'sort +0.18446744073709551615R input' on 64 bit systems.
diff --git a/src/od.c b/src/od.c
index 8bb463ca8..fd5fc45bd 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1402,7 +1402,7 @@ get_lcm (void)
    leading '+' return true and set *OFFSET to the offset it denotes.  */
 
 static bool
-parse_old_offset (char const *s, uintmax_t *offset)
+parse_old_offset (char *s, uintmax_t *offset)
 {
   int radix;
 
@@ -1414,10 +1414,24 @@ parse_old_offset (char const *s, uintmax_t *offset)
     ++s;
 
   /* Determine the radix we'll use to interpret S.  If there is a '.',
+     optionally followed by 'B' or 'b' and then end of string,
      it's decimal, otherwise, if the string begins with '0X'or '0x',
      it's hexadecimal, else octal.  */
-  if (strchr (s, '.') != nullptr)
-    radix = 10;
+  char *dot = strchr (s, '.');
+  if (dot)
+    {
+      bool b = dot[1] == 'B' || dot[1] == 'b';
+      if (dot[b + 1])
+        dot = nullptr;
+    }
+
+  if (dot)
+    {
+      /* Temporarily remove the '.' from the decimal string.  */
+      dot[0] = dot[1];
+      dot[1] = '\0';
+      radix = 10;
+    }
   else
     {
       if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
@@ -1426,7 +1440,16 @@ parse_old_offset (char const *s, uintmax_t *offset)
         radix = 8;
     }
 
-  return xstrtoumax (s, nullptr, radix, offset, "Bb") == LONGINT_OK;
+  enum strtol_error s_err = xstrtoumax (s, nullptr, radix, offset, "Bb");
+
+  if (dot)
+    {
+      /* Restore the decimal string's original value.  */
+      dot[1] = dot[0];
+      dot[0] = '.';
+    }
+
+  return s_err == LONGINT_OK;
 }
 
 /* Read a chunk of size BYTES_PER_BLOCK from the input files, write the
diff --git a/tests/od/od.pl b/tests/od/od.pl
index 5bb271e60..9688607c6 100755
--- a/tests/od/od.pl
+++ b/tests/od/od.pl
@@ -60,6 +60,10 @@ my @Tests =
      ['j-proc', "-An -c -j $proc_file_byte_count $proc_file",
                                {IN=>{f2=>'e'}}, {OUT=>"   e\n"}],
 
+     # Check that the traditional form '+N.' works, as per POSIX.
+     ['trad-dot1', '+1.', {IN_PIPE=>'a'}, {OUT=>"0000001\n"}],
+     ['trad-dot512', '+1.b', {IN_PIPE => 'a' x 512}, {OUT=>"0001000\n"}],
+
      # Ensure that a large width does not cause trouble.
      # From coreutils-7.0 through coreutils-8.21, these would print
      # approximately 128KiB of padding.
-- 
2.50.0

From 4d527dda583f6d3b0bd66364549472a5d98cef67 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 26 Jun 2025 08:45:47 -0700
Subject: [PATCH 08/19] od: prefer intmax_t to uintmax_t

* src/od.c (MAX_ADDRESS_LENGTH, pseudo_offset, n_bytes_to_skip)
(max_bytes_to_format, end_offset, skip, format_address_none)
(format_address_std, format_address_paren, format_address_label)
(write_block, parse_old_offset, dump, dump_strings, main):
Prefer intmax_t to uintmax_t.  This makes no practical difference,
and lets -fsanitize=undefined check for signed integer overflow.
(skip, dump): Remove no-longer-needed casts.
(xstr2nonneg): New static function.  All callers of xstrtoumax
now call this function instead.
(main): Use ckd_add to detect signed integer overflow, since
the unsigned trick no longer works reliably.
Let xstrtol_fatal report the overflow, instead of doing
it by hand ourselves.
---
 src/od.c | 103 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 48 deletions(-)

diff --git a/src/od.c b/src/od.c
index fd5fc45bd..b8137fa8a 100644
--- a/src/od.c
+++ b/src/od.c
@@ -203,7 +203,8 @@ static int address_base;
 
 /* The number of octal digits required to represent the largest
    address value.  */
-enum { MAX_ADDRESS_LENGTH = UINTMAX_WIDTH / 3 + (UINTMAX_WIDTH % 3 != 0) };
+enum { MAX_ADDRESS_LENGTH = ((INTMAX_WIDTH - 1) / 3
+                             + ((INTMAX_WIDTH - 1) % 3 != 0)) };
 
 /* Width of a normal address.  */
 static int address_pad_len;
@@ -224,24 +225,24 @@ static bool flag_pseudo_start;
 
 /* The difference between the old-style pseudo starting address and
    the number of bytes to skip.  */
-static uintmax_t pseudo_offset;
+static intmax_t pseudo_offset;
 
 /* Function that accepts an address and an optional following char,
    and prints the address and char to stdout.  */
-static void (*format_address) (uintmax_t, char);
+static void (*format_address) (intmax_t, char);
 
 /* The number of input bytes to skip before formatting and writing.  */
-static uintmax_t n_bytes_to_skip = 0;
+static intmax_t n_bytes_to_skip = 0;
 
 /* When false, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
    input is formatted.  */
 static bool limit_bytes_to_format = false;
 
 /* The maximum number of bytes that will be formatted.  */
-static uintmax_t max_bytes_to_format;
+static intmax_t max_bytes_to_format;
 
 /* The offset of the first byte after the last byte to be formatted.  */
-static uintmax_t end_offset;
+static intmax_t end_offset;
 
 /* When true and two or more consecutive blocks are equal, format
    only the first block and output an asterisk alone on the following
@@ -1079,7 +1080,7 @@ decode_format_string (char const *s)
    advance IN_STREAM.  */
 
 static bool
-skip (uintmax_t n_skip)
+skip (intmax_t n_skip)
 {
   bool ok = true;
   int in_errno = 0;
@@ -1114,7 +1115,7 @@ skip (uintmax_t n_skip)
              proc-like file systems.  */
           if (usable_size && STP_BLKSIZE (&file_stats) < file_stats.st_size)
             {
-              if ((uintmax_t) file_stats.st_size < n_skip)
+              if (file_stats.st_size < n_skip)
                 n_skip -= file_stats.st_size;
               else
                 {
@@ -1182,13 +1183,13 @@ skip (uintmax_t n_skip)
 }
 
 static void
-format_address_none (MAYBE_UNUSED uintmax_t address,
+format_address_none (MAYBE_UNUSED intmax_t address,
                      MAYBE_UNUSED char c)
 {
 }
 
 static void
-format_address_std (uintmax_t address, char c)
+format_address_std (intmax_t address, char c)
 {
   char buf[MAX_ADDRESS_LENGTH + 2];
   char *p = buf + sizeof buf;
@@ -1228,7 +1229,7 @@ format_address_std (uintmax_t address, char c)
 }
 
 static void
-format_address_paren (uintmax_t address, char c)
+format_address_paren (intmax_t address, char c)
 {
   putchar ('(');
   format_address_std (address, ')');
@@ -1237,7 +1238,7 @@ format_address_paren (uintmax_t address, char c)
 }
 
 static void
-format_address_label (uintmax_t address, char c)
+format_address_label (intmax_t address, char c)
 {
   format_address_std (address, ' ');
   format_address_paren (address + pseudo_offset, c);
@@ -1254,7 +1255,7 @@ format_address_label (uintmax_t address, char c)
    That condition may be false only for the last input block.  */
 
 static void
-write_block (uintmax_t current_offset, idx_t n_bytes,
+write_block (intmax_t current_offset, idx_t n_bytes,
              char const *prev_block, char const *curr_block)
 {
   static bool first = true;
@@ -1398,11 +1399,23 @@ get_lcm (void)
   return l_c_m;
 }
 
+/* Act like xstrtoimax (NPTR, nullptr, BASE, VAL, VALID_SUFFIXES),
+   except reject negative values, and *VAL may be set if
+   LONGINT_INVALID is returned.  */
+static strtol_error
+xstr2nonneg (char const *restrict nptr, int base, intmax_t *val,
+             char const *restrict valid_suffixes)
+{
+  strtol_error s_err = xstrtoimax (nptr, nullptr, base, val, valid_suffixes);
+  return s_err != LONGINT_INVALID && *val < 0 ? LONGINT_INVALID : s_err;
+}
+
 /* If S is a valid traditional offset specification with an optional
-   leading '+' return true and set *OFFSET to the offset it denotes.  */
+   leading '+' return true and set *OFFSET to the offset it denotes.
+   Otherwise return false and possibly set *OFFSET.  */
 
 static bool
-parse_old_offset (char *s, uintmax_t *offset)
+parse_old_offset (char *s, intmax_t *offset)
 {
   int radix;
 
@@ -1440,7 +1453,7 @@ parse_old_offset (char *s, uintmax_t *offset)
         radix = 8;
     }
 
-  enum strtol_error s_err = xstrtoumax (s, nullptr, radix, offset, "Bb");
+  enum strtol_error s_err = xstr2nonneg (s, radix, offset, "Bb");
 
   if (dot)
     {
@@ -1468,7 +1481,7 @@ static bool
 dump (void)
 {
   char *block[2];
-  uintmax_t current_offset;
+  intmax_t current_offset;
   bool idx = false;
   bool ok = true;
   idx_t n_bytes_read;
@@ -1482,14 +1495,12 @@ dump (void)
     {
       while (ok)
         {
-          idx_t n_needed;
           if (current_offset >= end_offset)
             {
               n_bytes_read = 0;
               break;
             }
-          n_needed = MIN (end_offset - current_offset,
-                          (uintmax_t) bytes_per_block);
+          idx_t n_needed = MIN (end_offset - current_offset, bytes_per_block);
           ok &= read_block (n_needed, block[idx], &n_bytes_read);
           if (n_bytes_read < bytes_per_block)
             break;
@@ -1553,7 +1564,7 @@ dump_strings (void)
 {
   idx_t bufsize = MAX (100, string_min + 1);
   char *buf = ximalloc (bufsize);
-  uintmax_t address = n_bytes_to_skip;
+  intmax_t address = n_bytes_to_skip;
   bool ok = true;
 
   while (true)
@@ -1657,7 +1668,7 @@ main (int argc, char **argv)
 
   /* The old-style 'pseudo starting address' to be printed in parentheses
      after any true address.  */
-  uintmax_t pseudo_start IF_LINT ( = 0);
+  intmax_t pseudo_start IF_LINT ( = 0);
 
   initialize_main (&argc, &argv);
   set_program_name (argv[0]);
@@ -1704,7 +1715,7 @@ main (int argc, char **argv)
 
   while (true)
     {
-      uintmax_t tmp;
+      intmax_t tmp;
       enum strtol_error s_err;
       int oi = -1;
       int c = getopt_long (argc, argv, short_options, long_options, &oi);
@@ -1747,8 +1758,7 @@ main (int argc, char **argv)
 
         case 'j':
           modern = true;
-          s_err = xstrtoumax (optarg, nullptr, 0,
-                              &n_bytes_to_skip, multipliers);
+          s_err = xstr2nonneg (optarg, 0, &n_bytes_to_skip, multipliers);
           if (s_err != LONGINT_OK)
             xstrtol_fatal (s_err, oi, c, long_options, optarg);
           break;
@@ -1757,8 +1767,7 @@ main (int argc, char **argv)
           modern = true;
           limit_bytes_to_format = true;
 
-          s_err = xstrtoumax (optarg, nullptr, 0, &max_bytes_to_format,
-                              multipliers);
+          s_err = xstr2nonneg (optarg, 0, &max_bytes_to_format, multipliers);
           if (s_err != LONGINT_OK)
             xstrtol_fatal (s_err, oi, c, long_options, optarg);
           break;
@@ -1769,16 +1778,14 @@ main (int argc, char **argv)
             string_min = 3;
           else
             {
-              s_err = xstrtoumax (optarg, nullptr, 0, &tmp, multipliers);
+              s_err = xstr2nonneg (optarg, 0, &tmp, multipliers);
+              /* The minimum string length + 1 must fit in idx_t,
+                 since we may allocate a buffer of this size + 1.  */
+              idx_t i;
+              if (s_err == LONGINT_OK && ckd_add (&i, tmp, 1))
+                s_err = LONGINT_OVERFLOW;
               if (s_err != LONGINT_OK)
                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
-
-              /* The minimum string length must be less than
-                 MIN (IDX_MAX, SIZE_MAX), since we may allocate a
-                 buffer of this size + 1.  */
-              if (MIN (IDX_MAX, SIZE_MAX) <= tmp)
-                error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
-
               string_min = tmp;
             }
           flag_dump_strings = true;
@@ -1853,13 +1860,16 @@ main (int argc, char **argv)
           else
             {
               intmax_t w_tmp;
-              s_err = xstrtoimax (optarg, nullptr, 10, &w_tmp, "");
-              if (s_err == LONGINT_OK && w_tmp <= 0)
-                s_err = LONGINT_INVALID;
+              s_err = xstr2nonneg (optarg, 10, &w_tmp, "");
+              if (s_err == LONGINT_OK)
+                {
+                  if (ckd_add (&desired_width, w_tmp, 0))
+                    s_err = LONGINT_OVERFLOW;
+                  else if (desired_width == 0)
+                    s_err = LONGINT_INVALID;
+                }
               if (s_err != LONGINT_OK)
                 xstrtol_fatal (s_err, oi, c, long_options, optarg);
-              if (ckd_add (&desired_width, w_tmp, 0))
-                error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg));
             }
           break;
 
@@ -1896,8 +1906,8 @@ main (int argc, char **argv)
 
   if (!modern || traditional)
     {
-      uintmax_t o1;
-      uintmax_t o2;
+      intmax_t o1;
+      intmax_t o2;
 
       switch (n_files)
         {
@@ -1970,12 +1980,9 @@ main (int argc, char **argv)
         format_address = format_address_label;
     }
 
-  if (limit_bytes_to_format)
-    {
-      end_offset = n_bytes_to_skip + max_bytes_to_format;
-      if (end_offset < n_bytes_to_skip)
-        error (EXIT_FAILURE, 0, _("skip-bytes + read-bytes is too large"));
-    }
+  if (limit_bytes_to_format
+      && ckd_add (&end_offset, n_bytes_to_skip, max_bytes_to_format))
+    error (EXIT_FAILURE, 0, _("skip-bytes + read-bytes is too large"));
 
   if (n_specs == 0)
     decode_format_string ("oS");
-- 
2.50.0

From d8aaafc09492e9edef7bcd74f8da332b54e02d74 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 26 Jun 2025 23:51:37 -0700
Subject: [PATCH 09/19] od: initialize type-size tables statically

* src/od.c (NO_SIZE): Make it explicitly 0, as the
initializers now rely on this.
(MAX_INTEGRAL_TYPE_SIZE): Remove.  All uses replaced by
ARRAY_CARDINALITY (integral_type_size) - 1.
Move static assertion down to where this can be used.
(integral_type_size, fp_type_size): Make them const,
and initialize them statically.
(main): Omit no-longer-needed initialization code.
---
 src/od.c | 89 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 40 deletions(-)

diff --git a/src/od.c b/src/od.c
index b8137fa8a..54869a32c 100644
--- a/src/od.c
+++ b/src/od.c
@@ -65,7 +65,7 @@
 
 enum size_spec
   {
-    NO_SIZE,
+    NO_SIZE = 0,
     CHAR,
     SHORT,
     INT,
@@ -92,7 +92,6 @@ enum output_format
     CHARACTER
   };
 
-enum { MAX_INTEGRAL_TYPE_SIZE = sizeof (unsigned long long int) };
 enum { MAX_INTEGRAL_TYPE_WIDTH = ULLONG_WIDTH };
 
 /* The maximum number of bytes needed for a format string, including
@@ -153,12 +152,6 @@ static char const bytes_to_unsigned_dec_digits[] =
 static char const bytes_to_hex_digits[] =
 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
 
-/* It'll be a while before we see integral types wider than 16 bytes,
-   but if/when it happens, this check will catch it.  Without this check,
-   a wider type would provoke a buffer overrun.  */
-static_assert (MAX_INTEGRAL_TYPE_SIZE
-               < ARRAY_CARDINALITY (bytes_to_hex_digits));
-
 /* Make sure the other arrays have the same length.  */
 static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_signed_dec_digits);
 static_assert (sizeof bytes_to_oct_digits
@@ -281,11 +274,53 @@ static FILE *in_stream;
 /* If true, at least one of the files we read was standard input.  */
 static bool have_read_stdin;
 
-/* Map the size in bytes to a type identifier.  */
-static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
+/* Map the size in bytes to a type identifier.
+   When two types have the same machine layout:
+     - Prefer unsigned int to higher ranked types, as its format is shorter.
+     - Prefer unsigned long to higher-ranked types, as it is older.
+     - Prefer uintmax_t to unsigned long long int; this wins if %lld
+       does not work but %jd does (e.g., MS-Windows).  */
+static enum size_spec const integral_type_size[] =
+  {
+#if UCHAR_MAX < USHRT_MAX
+    [sizeof (unsigned char)] = CHAR,
+#endif
+#if USHRT_MAX < UINT_MAX
+    [sizeof (unsigned short int)] = SHORT,
+#endif
+    [sizeof (unsigned int)] = INT,
+#if UINT_MAX < ULONG_MAX
+    [sizeof (unsigned long int)] = LONG,
+#endif
+#if ULONG_MAX < ULLONG_MAX
+    [sizeof (unsigned long long int)] = LONG_LONG,
+#endif
+  };
+
+/* Map the size in bytes to a floating type identifier.
+   When two types have the same machine layout:
+     - Prefer double to the other types, as its format is shorter.  */
+static enum size_spec const fp_type_size[] =
+  {
+#if FLOAT16_SUPPORTED
+    [sizeof (float16)] = FLOAT_HALF,
+#elif BF16_SUPPORTED
+    [sizeof (bfloat16)] = FLOAT_HALF,
+#endif
+#if FLT_MANT_DIG < DBL_MANT_DIG || FLT_MAX_EXP < DBL_MAX_EXP
+    [sizeof (float)] = FLOAT_SINGLE,
+#endif
+    [sizeof (double)] = FLOAT_DOUBLE,
+#if DBL_MANT_DIG < LDBL_MANT_DIG || DBL_MAX_EXP < LDBL_MAX_EXP
+    [sizeof (long double)] = FLOAT_LONG_DOUBLE,
+#endif
+  };
 
-#define MAX_FP_TYPE_SIZE sizeof (long double)
-static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
+/* It'll be a while before we see integral types wider than 16 bytes,
+   but if/when it happens, this check will catch it.  Without this check,
+   a wider type would provoke a buffer overrun.  */
+static_assert (ARRAY_CARDINALITY (integral_type_size)
+               <= ARRAY_CARDINALITY (bytes_to_hex_digits));
 
 #ifndef WORDS_BIGENDIAN
 # define WORDS_BIGENDIAN 0
@@ -735,7 +770,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
             size = sizeof (int);
           else
             {
-              if (MAX_INTEGRAL_TYPE_SIZE < size
+              if (ARRAY_CARDINALITY (integral_type_size) <= size
                   || integral_type_size[size] == NO_SIZE)
                 {
                   error (0, 0, _("invalid type string %s;\nthis system"
@@ -865,7 +900,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
             size = sizeof (double);
           else
             {
-              if (size > MAX_FP_TYPE_SIZE
+              if (ARRAY_CARDINALITY (fp_type_size) <= size
                   || fp_type_size[size] == NO_SIZE
                   || (! FLOAT16_SUPPORTED && BF16_SUPPORTED
                       && size == sizeof (bfloat16))
@@ -1678,32 +1713,6 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
-  for (idx_t i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++)
-    integral_type_size[i] = NO_SIZE;
-
-  integral_type_size[sizeof (char)] = CHAR;
-  integral_type_size[sizeof (short int)] = SHORT;
-  integral_type_size[sizeof (int)] = INT;
-  integral_type_size[sizeof (long int)] = LONG;
-  /* If 'long int' and 'long long int' have the same size, it's fine
-     to overwrite the entry for 'long' with this one.  */
-  integral_type_size[sizeof (unsigned long long int)] = LONG_LONG;
-
-  for (idx_t i = 0; i <= MAX_FP_TYPE_SIZE; i++)
-    fp_type_size[i] = NO_SIZE;
-
-#if FLOAT16_SUPPORTED
-  fp_type_size[sizeof (float16)] = FLOAT_HALF;
-#elif BF16_SUPPORTED
-  fp_type_size[sizeof (bfloat16)] = FLOAT_HALF;
-#endif
-  fp_type_size[sizeof (float)] = FLOAT_SINGLE;
-  /* The array entry for 'double' is filled in after that for 'long double'
-     so that if they are the same size, we avoid any overhead of
-     long double computation in libc.  */
-  fp_type_size[sizeof (long double)] = FLOAT_LONG_DOUBLE;
-  fp_type_size[sizeof (double)] = FLOAT_DOUBLE;
-
   n_specs = 0;
   n_specs_allocated = 0;
   spec = nullptr;
-- 
2.50.0

From 274226dbff6866b130bc69b84a7b7c8c00c3264d Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 27 Jun 2025 08:45:53 -0700
Subject: [PATCH 10/19] od: support uintmax_t too

This has practical effect only on hypothetical platforms where
uintmax_t is wider than unsigned long long int.
* src/od.c (enum size_spec): New constant INTMAX.
(MAX_INTEGRAL_TYPE_WIDTH): Now equals UINTMAX_WIDTH.
(FMT_BYTES_ALLOCATED): Allow for the extra "l" in "%lld".
Also, fix off-by-two error in size calculation.
(width_bytes, integral_type_size): Add entries for uintmax_t.
(print_intmax): New function.
(decode_one_function): Use it.
(ISPEC_TO_FORMAT): New arg Max_fmt. All uses changed.
---
 src/od.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/od.c b/src/od.c
index 54869a32c..27eb1ad41 100644
--- a/src/od.c
+++ b/src/od.c
@@ -71,7 +71,7 @@ enum size_spec
     INT,
     LONG,
     LONG_LONG,
-    /* FIXME: add INTMAX support, too */
+    INTMAX,
     FLOAT_HALF,
     FLOAT_SINGLE,
     FLOAT_DOUBLE,
@@ -92,7 +92,7 @@ enum output_format
     CHARACTER
   };
 
-enum { MAX_INTEGRAL_TYPE_WIDTH = ULLONG_WIDTH };
+enum { MAX_INTEGRAL_TYPE_WIDTH = UINTMAX_WIDTH };
 
 /* The maximum number of bytes needed for a format string, including
    the trailing nul.  Each format string expects a variable amount of
@@ -101,8 +101,8 @@ enum { MAX_INTEGRAL_TYPE_WIDTH = ULLONG_WIDTH };
 enum
   {
     FMT_BYTES_ALLOCATED =
-           (sizeof "%*.99" + 1
-            + MAX (sizeof "ld",
+           (sizeof "%*.99" - 1
+            + MAX (sizeof "lld",
                    MAX (sizeof "jd",
                         MAX (sizeof "jo",
                              MAX (sizeof "ju",
@@ -167,6 +167,7 @@ static const int width_bytes[] =
   sizeof (int),
   sizeof (long int),
   sizeof (unsigned long long int),
+  sizeof (uintmax_t),
 #if BF16_SUPPORTED
   sizeof (bfloat16),
 #else
@@ -292,8 +293,11 @@ static enum size_spec const integral_type_size[] =
 #if UINT_MAX < ULONG_MAX
     [sizeof (unsigned long int)] = LONG,
 #endif
-#if ULONG_MAX < ULLONG_MAX
+#if ULONG_MAX < ULLONG_MAX && ULLONG_MAX < UINTMAX_MAX
     [sizeof (unsigned long long int)] = LONG_LONG,
+#endif
+#if ULONG_MAX < UINTMAX_MAX
+    [sizeof (uintmax_t)] = INTMAX,
 #endif
   };
 
@@ -561,6 +565,7 @@ PRINT_TYPE (print_short, unsigned short int)
 PRINT_TYPE (print_int, unsigned int)
 PRINT_TYPE (print_long, unsigned long int)
 PRINT_TYPE (print_long_long, unsigned long long int)
+PRINT_TYPE (print_intmax, uintmax_t)
 
 PRINT_FLOATTYPE (print_bfloat, bfloat16, ftoastr, FLT_BUFSIZE_BOUND)
 PRINT_FLOATTYPE (print_halffloat, float16, ftoastr, FLT_BUFSIZE_BOUND)
@@ -783,10 +788,11 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
           break;
         }
 
-#define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format)	\
-  ((Spec) == LONG_LONG ? (Max_format)					\
-   : ((Spec) == LONG ? (Long_format)					\
-      : (Min_format)))							\
+#define ISPEC_TO_FORMAT(Spec, Min_fmt, Long_fmt, Long_long_fmt, Max_fmt) \
+  ((Spec) == INTMAX ? (Max_fmt)						\
+   : (Spec) == LONG_LONG ? (Long_long_fmt)				\
+   : (Spec) == LONG ? (Long_fmt)					\
+   : (Min_fmt))
 
       size_spec = integral_type_size[size];
 
@@ -796,28 +802,28 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
           fmt = SIGNED_DECIMAL;
           field_width = bytes_to_signed_dec_digits[size];
           sprintf (tspec->fmt_string, "%%*%s",
-                   ISPEC_TO_FORMAT (size_spec, "d", "ld", "jd"));
+                   ISPEC_TO_FORMAT (size_spec, "d", "ld", "lld", "jd"));
           break;
 
         case 'o':
           fmt = OCTAL;
           sprintf (tspec->fmt_string, "%%*.%d%s",
                    (field_width = bytes_to_oct_digits[size]),
-                   ISPEC_TO_FORMAT (size_spec, "o", "lo", "jo"));
+                   ISPEC_TO_FORMAT (size_spec, "o", "lo", "llo", "jo"));
           break;
 
         case 'u':
           fmt = UNSIGNED_DECIMAL;
           field_width = bytes_to_unsigned_dec_digits[size];
           sprintf (tspec->fmt_string, "%%*%s",
-                   ISPEC_TO_FORMAT (size_spec, "u", "lu", "ju"));
+                   ISPEC_TO_FORMAT (size_spec, "u", "lu", "llu", "ju"));
           break;
 
         case 'x':
           fmt = HEXADECIMAL;
           sprintf (tspec->fmt_string, "%%*.%d%s",
                    (field_width = bytes_to_hex_digits[size]),
-                   ISPEC_TO_FORMAT (size_spec, "x", "lx", "jx"));
+                   ISPEC_TO_FORMAT (size_spec, "x", "lx", "llx", "jx"));
           break;
 
         default:
@@ -850,6 +856,10 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
           print_function = print_long_long;
           break;
 
+        case INTMAX:
+          print_function = print_intmax;
+          break;
+
         default:
           affirm (false);
         }
-- 
2.50.0

From 3ad59e19bc289b41c7998f91eda424672b2c8497 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 27 Jun 2025 16:08:06 -0700
Subject: [PATCH 11/19] od: replace lookup tables with simple arithmetic

* src/od.c (FMT_BYTES_ALLOCATED): Use a simpler formula.
Although slightly too generous, the storage wasted is very small
and it pacifies gcc -Wformat-overflow=2.
(bytes_to_oct_digits, bytes_to_signed_dec_digits)
(bytes_to_unsigned_dec_digits, bytes_to_hex_digoits): Remove.
All uses replaced by algorithmic calculations, which are good
enough: they are valid for integers up to 2620 bits (!) and might
be slightly conservative for wider integers.  Remove related
static_asserts, which are no longer needed.
---
 src/od.c | 53 +++++++----------------------------------------------
 1 file changed, 7 insertions(+), 46 deletions(-)

diff --git a/src/od.c b/src/od.c
index 27eb1ad41..89a1ff986 100644
--- a/src/od.c
+++ b/src/od.c
@@ -101,12 +101,7 @@ enum { MAX_INTEGRAL_TYPE_WIDTH = UINTMAX_WIDTH };
 enum
   {
     FMT_BYTES_ALLOCATED =
-           (sizeof "%*.99" - 1
-            + MAX (sizeof "lld",
-                   MAX (sizeof "jd",
-                        MAX (sizeof "jo",
-                             MAX (sizeof "ju",
-                                  sizeof "jx")))))
+      sizeof "%*.%dlld" - sizeof "%d" + INT_STRLEN_BOUND (int) + 1
   };
 
 /* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable.  */
@@ -130,34 +125,6 @@ struct tspec
     idx_t pad_width; /* Total padding to be divided among fields.  */
   };
 
-/* Convert the number of 8-bit bytes of a binary representation to
-   the number of characters (digits + sign if the type is signed)
-   required to represent the same quantity in the specified base/type.
-   For example, a 32-bit (4-byte) quantity may require a field width
-   as wide as the following for these types:
-   11	unsigned octal
-   11	signed decimal
-   10	unsigned decimal
-   8	unsigned hexadecimal  */
-
-static char const bytes_to_oct_digits[] =
-{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
-
-static char const bytes_to_signed_dec_digits[] =
-{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
-
-static char const bytes_to_unsigned_dec_digits[] =
-{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
-
-static char const bytes_to_hex_digits[] =
-{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-
-/* Make sure the other arrays have the same length.  */
-static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_signed_dec_digits);
-static_assert (sizeof bytes_to_oct_digits
-               == sizeof bytes_to_unsigned_dec_digits);
-static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_hex_digits);
-
 /* Convert enum size_spec to the size of the named type.  */
 static const int width_bytes[] =
 {
@@ -320,12 +287,6 @@ static enum size_spec const fp_type_size[] =
 #endif
   };
 
-/* It'll be a while before we see integral types wider than 16 bytes,
-   but if/when it happens, this check will catch it.  Without this check,
-   a wider type would provoke a buffer overrun.  */
-static_assert (ARRAY_CARDINALITY (integral_type_size)
-               <= ARRAY_CARDINALITY (bytes_to_hex_digits));
-
 #ifndef WORDS_BIGENDIAN
 # define WORDS_BIGENDIAN 0
 #endif
@@ -800,29 +761,29 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
         {
         case 'd':
           fmt = SIGNED_DECIMAL;
-          field_width = bytes_to_signed_dec_digits[size];
+          field_width = INT_BITS_STRLEN_BOUND (CHAR_BIT * size - 1) + 1;
           sprintf (tspec->fmt_string, "%%*%s",
                    ISPEC_TO_FORMAT (size_spec, "d", "ld", "lld", "jd"));
           break;
 
         case 'o':
           fmt = OCTAL;
-          sprintf (tspec->fmt_string, "%%*.%d%s",
-                   (field_width = bytes_to_oct_digits[size]),
+          field_width = (CHAR_BIT * size + 2) / 3;
+          sprintf (tspec->fmt_string, "%%*.%d%s", field_width,
                    ISPEC_TO_FORMAT (size_spec, "o", "lo", "llo", "jo"));
           break;
 
         case 'u':
           fmt = UNSIGNED_DECIMAL;
-          field_width = bytes_to_unsigned_dec_digits[size];
+          field_width = INT_BITS_STRLEN_BOUND (CHAR_BIT * size);
           sprintf (tspec->fmt_string, "%%*%s",
                    ISPEC_TO_FORMAT (size_spec, "u", "lu", "llu", "ju"));
           break;
 
         case 'x':
           fmt = HEXADECIMAL;
-          sprintf (tspec->fmt_string, "%%*.%d%s",
-                   (field_width = bytes_to_hex_digits[size]),
+          field_width = (CHAR_BIT * size + 3) / 4;
+          sprintf (tspec->fmt_string, "%%*.%d%s", field_width,
                    ISPEC_TO_FORMAT (size_spec, "x", "lx", "llx", "jx"));
           break;
 
-- 
2.50.0

From 56aa549a06eeaa39295ccbf8424bcdbf9b60b99a Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 27 Jun 2025 17:08:28 -0700
Subject: [PATCH 12/19] od: omit some duplicate code

On x86-64 (for example) print_long, print_long_long, and
print_intmax all behave identically, so give GCC enough info so
that it generates code for just one of these functions.
* src/od.c (enum size_spec): Arrange for enum values to
be the same if they represent types that behave the same.
(width_bytes, ISPEC_TO_FORMAT, decode_one_format):
Match the enum size_spec changes.
---
 src/od.c | 125 ++++++++++++++++++++++++-------------------------------
 1 file changed, 55 insertions(+), 70 deletions(-)

diff --git a/src/od.c b/src/od.c
index 89a1ff986..df89c69c7 100644
--- a/src/od.c
+++ b/src/od.c
@@ -67,15 +67,17 @@ enum size_spec
   {
     NO_SIZE = 0,
     CHAR,
-    SHORT,
-    INT,
-    LONG,
-    LONG_LONG,
-    INTMAX,
-    FLOAT_HALF,
-    FLOAT_SINGLE,
-    FLOAT_DOUBLE,
-    FLOAT_LONG_DOUBLE,
+    SHORT = CHAR + (UCHAR_MAX < USHRT_MAX),
+    INT = SHORT + (USHRT_MAX < UINT_MAX),
+    LONG = INT + (UINT_MAX < ULONG_MAX),
+    LONG_LONG = LONG + (ULONG_MAX < ULLONG_MAX),
+    INTMAX = LONG_LONG + (ULLONG_MAX < UINTMAX_MAX),
+    FLOAT_HALF, /* Used only if (FLOAT16_SUPPORTED || BF16_SUPPORTED).  */
+    FLOAT_SINGLE = FLOAT_HALF + (FLOAT16_SUPPORTED || BF16_SUPPORTED),
+    FLOAT_DOUBLE = FLOAT_SINGLE + (FLT_MANT_DIG < DBL_MANT_DIG
+                                   || FLT_MAX_EXP < DBL_MAX_EXP),
+    FLOAT_LONG_DOUBLE = FLOAT_DOUBLE + (DBL_MANT_DIG < LDBL_MANT_DIG
+                                        || DBL_MAX_EXP < LDBL_MAX_EXP),
     N_SIZE_SPECS
   };
 
@@ -130,14 +132,24 @@ static const int width_bytes[] =
 {
   -1,
   sizeof (char),
+#if UCHAR_MAX < USHRT_MAX
   sizeof (short int),
+#endif
+#if USHRT_MAX < UINT_MAX
   sizeof (int),
+#endif
+#if UINT_MAX < ULONG_MAX
   sizeof (long int),
+#endif
+#if ULONG_MAX < ULLONG_MAX
   sizeof (unsigned long long int),
+#endif
+#if ULLONG_MAX < UINTMAX_MAX
   sizeof (uintmax_t),
+#endif
 #if BF16_SUPPORTED
   sizeof (bfloat16),
-#else
+#elif FLOAT16_SUPPORTED
   sizeof (float16),
 #endif
   sizeof (float),
@@ -244,7 +256,7 @@ static bool have_read_stdin;
 
 /* Map the size in bytes to a type identifier.
    When two types have the same machine layout:
-     - Prefer unsigned int to higher ranked types, as its format is shorter.
+     - Prefer unsigned int to other types, as its format is shorter.
      - Prefer unsigned long to higher-ranked types, as it is older.
      - Prefer uintmax_t to unsigned long long int; this wins if %lld
        does not work but %jd does (e.g., MS-Windows).  */
@@ -750,9 +762,9 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
         }
 
 #define ISPEC_TO_FORMAT(Spec, Min_fmt, Long_fmt, Long_long_fmt, Max_fmt) \
-  ((Spec) == INTMAX ? (Max_fmt)						\
-   : (Spec) == LONG_LONG ? (Long_long_fmt)				\
-   : (Spec) == LONG ? (Long_fmt)					\
+  (LONG < INTMAX && (Spec) == INTMAX ? (Max_fmt)			\
+   : LONG < LONG_LONG && (Spec) == LONG_LONG ? (Long_long_fmt)		\
+   : INT < LONG && (Spec) == LONG ? (Long_fmt)				\
    : (Min_fmt))
 
       size_spec = integral_type_size[size];
@@ -791,39 +803,18 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
           unreachable ();
         }
 
-      switch (+size_spec)
-        {
-        case CHAR:
-          print_function = (fmt == SIGNED_DECIMAL
-                            ? print_s_char
-                            : print_char);
-          break;
-
-        case SHORT:
-          print_function = (fmt == SIGNED_DECIMAL
-                            ? print_s_short
-                            : print_short);
-          break;
-
-        case INT:
-          print_function = print_int;
-          break;
-
-        case LONG:
-          print_function = print_long;
-          break;
-
-        case LONG_LONG:
-          print_function = print_long_long;
-          break;
-
-        case INTMAX:
-          print_function = print_intmax;
-          break;
-
-        default:
-          affirm (false);
-        }
+      /* Prefer INT, prefer LONG to longer types,
+         and prefer INTMAX to LONG_LONG.  */
+      print_function
+        = (size_spec == INT ? print_int
+           : size_spec == SHORT ? (fmt == SIGNED_DECIMAL
+                                   ? print_s_short : print_short)
+           : size_spec == CHAR ? (fmt == SIGNED_DECIMAL
+                                  ? print_s_char : print_char)
+           : size_spec == LONG ? print_long
+           : size_spec == INTMAX ? print_intmax
+           : size_spec == LONG_LONG ? print_long_long
+           : (affirm (false), nullptr));
       break;
 
     case 'f':
@@ -872,10 +863,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
           else
             {
               if (ARRAY_CARDINALITY (fp_type_size) <= size
-                  || fp_type_size[size] == NO_SIZE
-                  || (! FLOAT16_SUPPORTED && BF16_SUPPORTED
-                      && size == sizeof (bfloat16))
-                  )
+                  || fp_type_size[size] == NO_SIZE)
                 {
                   error (0, 0,
                          _("invalid type string %s;\n"
@@ -904,32 +892,29 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
         idx_t decimal_point_len =
           (locale->decimal_point[0] ? strlen (locale->decimal_point) : 1);
 
-        switch (+size_spec)
+        if (size_spec == FLOAT_DOUBLE)
+          {
+            print_function = print_double;
+            field_width = DBL_STRLEN_BOUND_L (decimal_point_len);
+          }
+        else if (size_spec == FLOAT_SINGLE)
+          {
+            print_function = print_float;
+            field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
+          }
+        else if (size_spec == FLOAT_HALF)
           {
-          case FLOAT_HALF:
             print_function = fmt == BFLOATING_POINT
                              ? print_bfloat : print_halffloat;
             field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
-            break;
-
-          case FLOAT_SINGLE:
-            print_function = print_float;
-            field_width = FLT_STRLEN_BOUND_L (decimal_point_len);
-            break;
-
-          case FLOAT_DOUBLE:
-            print_function = print_double;
-            field_width = DBL_STRLEN_BOUND_L (decimal_point_len);
-            break;
-
-          case FLOAT_LONG_DOUBLE:
+          }
+        else if (size_spec == FLOAT_LONG_DOUBLE)
+          {
             print_function = print_long_double;
             field_width = LDBL_STRLEN_BOUND_L (decimal_point_len);
-            break;
-
-          default:
-            affirm (false);
           }
+        else
+          affirm (false);
 
         break;
       }
-- 
2.50.0

From 0e104647a7bf727f273228c2a3292699c8f17f67 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 27 Jun 2025 17:52:15 -0700
Subject: [PATCH 13/19] od: minor lcm tuning

* src/od.c (dump, main): Redo lcm calcuations to avoid a multiply.
---
 src/od.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/od.c b/src/od.c
index df89c69c7..fd2e2138e 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1527,7 +1527,8 @@ dump (void)
 
       /* Ensure zero-byte padding up to the smallest multiple of l_c_m that
          is at least as large as n_bytes_read.  */
-      idx_t bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m);
+      idx_t bytes_to_write = (n_bytes_read + l_c_m - 1
+                              - (n_bytes_read + l_c_m - 1) % l_c_m);
 
       memset (block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read);
       write_block (current_offset, n_bytes_read, block[!idx], block[idx]);
@@ -1997,7 +1998,8 @@ main (int argc, char **argv)
   else
     {
       if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
-        bytes_per_block = l_c_m * (DEFAULT_BYTES_PER_BLOCK / l_c_m);
+        bytes_per_block = (DEFAULT_BYTES_PER_BLOCK
+                           - DEFAULT_BYTES_PER_BLOCK % l_c_m);
       else
         bytes_per_block = l_c_m;
     }
-- 
2.50.0

From 34b5043c6e78fba2167cc18b2ec2a14158fd809a Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 09:49:42 -0700
Subject: [PATCH 14/19] od: simpler static initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/od.c (address_base, address_pad_len, format_address):
Initialize statically rather than dynamically.
(limit_bytes_to_format): Remove.  All uses replaced by
checking sign of end_offset.
(max_bytes_to_format): Remove static var.  Now local to ‘main’.
(end_offset): -1 now means no limit.  All uses changed.
---
 src/od.c | 48 ++++++++++++++++++------------------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/src/od.c b/src/od.c
index fd2e2138e..563baf0b1 100644
--- a/src/od.c
+++ b/src/od.c
@@ -172,7 +172,7 @@ static char const charname[33][4] =
 };
 
 /* Address base (8, 10 or 16).  */
-static int address_base;
+static int address_base = 8;
 
 /* The number of octal digits required to represent the largest
    address value.  */
@@ -180,7 +180,7 @@ enum { MAX_ADDRESS_LENGTH = ((INTMAX_WIDTH - 1) / 3
                              + ((INTMAX_WIDTH - 1) % 3 != 0)) };
 
 /* Width of a normal address.  */
-static int address_pad_len;
+static int address_pad_len = 7;
 
 /* Minimum length when detecting --strings.  */
 static idx_t string_min;
@@ -202,20 +202,15 @@ static intmax_t pseudo_offset;
 
 /* Function that accepts an address and an optional following char,
    and prints the address and char to stdout.  */
-static void (*format_address) (intmax_t, char);
+static void format_address_std (intmax_t, char);
+static void (*format_address) (intmax_t, char) = format_address_std;
 
 /* The number of input bytes to skip before formatting and writing.  */
-static intmax_t n_bytes_to_skip = 0;
+static intmax_t n_bytes_to_skip;
 
-/* When false, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all
-   input is formatted.  */
-static bool limit_bytes_to_format = false;
-
-/* The maximum number of bytes that will be formatted.  */
-static intmax_t max_bytes_to_format;
-
-/* The offset of the first byte after the last byte to be formatted.  */
-static intmax_t end_offset;
+/* The offset of the first byte after the last byte to be formatted.
+   If negative, there is no limit.  */
+static intmax_t end_offset = -1;
 
 /* When true and two or more consecutive blocks are equal, format
    only the first block and output an asterisk alone on the following
@@ -992,7 +987,7 @@ open_next_file (void)
     }
   while (in_stream == nullptr);
 
-  if (limit_bytes_to_format && !flag_dump_strings)
+  if (0 <= end_offset && !flag_dump_strings)
     setvbuf (in_stream, nullptr, _IONBF, 0);
 
   return ok;
@@ -1482,7 +1477,7 @@ dump (void)
 
   current_offset = n_bytes_to_skip;
 
-  if (limit_bytes_to_format)
+  if (0 <= end_offset)
     {
       while (ok)
         {
@@ -1537,7 +1532,7 @@ dump (void)
 
   format_address (current_offset, '\n');
 
-  if (limit_bytes_to_format && current_offset >= end_offset)
+  if (0 <= end_offset && end_offset <= current_offset)
     ok &= check_and_close (0);
 
   free (block[0]);
@@ -1564,12 +1559,12 @@ dump_strings (void)
       idx_t i = 0;
       int c = 1;  /* Init to 1 so can distinguish if NUL read.  */
 
-      if (limit_bytes_to_format
+      if (0 <= end_offset
           && (end_offset < string_min || end_offset - string_min < address))
         break;
 
       /* Store consecutive printable characters to BUF.  */
-      while (!limit_bytes_to_format || address < end_offset)
+      while (! (0 <= end_offset && end_offset <= address))
         {
           if (i == bufsize - 1)
             buf = xpalloc (buf, &bufsize, 1, -1, sizeof *buf);
@@ -1658,6 +1653,10 @@ main (int argc, char **argv)
   idx_t width_per_block = 0;
   static char const multipliers[] = "bEGKkMmPQRTYZ0";
 
+  /* The maximum number of bytes that will be formatted.
+     If negative, there is no limit.  */
+  intmax_t max_bytes_to_format = -1;
+
   /* The old-style 'pseudo starting address' to be printed in parentheses
      after any true address.  */
   intmax_t pseudo_start IF_LINT ( = 0);
@@ -1670,15 +1669,6 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
-  n_specs = 0;
-  n_specs_allocated = 0;
-  spec = nullptr;
-
-  format_address = format_address_std;
-  address_base = 8;
-  address_pad_len = 7;
-  flag_dump_strings = false;
-
   while (true)
     {
       intmax_t tmp;
@@ -1731,8 +1721,6 @@ main (int argc, char **argv)
 
         case 'N':
           modern = true;
-          limit_bytes_to_format = true;
-
           s_err = xstr2nonneg (optarg, 0, &max_bytes_to_format, multipliers);
           if (s_err != LONGINT_OK)
             xstrtol_fatal (s_err, oi, c, long_options, optarg);
@@ -1946,7 +1934,7 @@ main (int argc, char **argv)
         format_address = format_address_label;
     }
 
-  if (limit_bytes_to_format
+  if (0 <= max_bytes_to_format
       && ckd_add (&end_offset, n_bytes_to_skip, max_bytes_to_format))
     error (EXIT_FAILURE, 0, _("skip-bytes + read-bytes is too large"));
 
-- 
2.50.0

From 9aab4fb287e4e0b0778337ed9c5316a153993f1c Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 14:01:18 -0700
Subject: [PATCH 15/19] od: simplify away one loop copy

* src/od.c (dump): Coalesce two loops into one.
---
 src/od.c | 55 +++++++++++++++++++------------------------------------
 1 file changed, 19 insertions(+), 36 deletions(-)

diff --git a/src/od.c b/src/od.c
index 563baf0b1..847eb6d74 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1467,7 +1467,6 @@ static bool
 dump (void)
 {
   char *block[2];
-  intmax_t current_offset;
   bool idx = false;
   bool ok = true;
   idx_t n_bytes_read;
@@ -1475,46 +1474,30 @@ dump (void)
   block[0] = xinmalloc (2, bytes_per_block);
   block[1] = block[0] + bytes_per_block;
 
-  current_offset = n_bytes_to_skip;
+  intmax_t current_offset = n_bytes_to_skip;
 
-  if (0 <= end_offset)
-    {
-      while (ok)
-        {
-          if (current_offset >= end_offset)
-            {
-              n_bytes_read = 0;
-              break;
-            }
-          idx_t n_needed = MIN (end_offset - current_offset, bytes_per_block);
-          ok &= read_block (n_needed, block[idx], &n_bytes_read);
-          if (n_bytes_read < bytes_per_block)
-            break;
-          affirm (n_bytes_read == bytes_per_block);
-          write_block (current_offset, n_bytes_read,
-                       block[!idx], block[idx]);
-          if (ferror (stdout))
-            ok = false;
-          current_offset += n_bytes_read;
-          idx = !idx;
-        }
-    }
-  else
+  do
     {
-      while (ok)
+      intmax_t needed_bound
+        = end_offset < 0 ? INTMAX_MAX : end_offset - current_offset;
+      if (needed_bound <= 0)
         {
-          ok &= read_block (bytes_per_block, block[idx], &n_bytes_read);
-          if (n_bytes_read < bytes_per_block)
-            break;
-          affirm (n_bytes_read == bytes_per_block);
-          write_block (current_offset, n_bytes_read,
-                       block[!idx], block[idx]);
-          if (ferror (stdout))
-            ok = false;
-          current_offset += n_bytes_read;
-          idx = !idx;
+          n_bytes_read = 0;
+          break;
         }
+      idx_t n_needed = MIN (bytes_per_block, needed_bound);
+      ok &= read_block (n_needed, block[idx], &n_bytes_read);
+      if (n_bytes_read < bytes_per_block)
+        break;
+      affirm (n_bytes_read == bytes_per_block);
+      write_block (current_offset, n_bytes_read,
+                   block[!idx], block[idx]);
+      if (ferror (stdout))
+        ok = false;
+      current_offset += n_bytes_read;
+      idx = !idx;
     }
+  while (ok);
 
   if (n_bytes_read > 0)
     {
-- 
2.50.0

From 984dcc37f2e821e20f747d387f1e9d6a2a31f448 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 15:04:30 -0700
Subject: [PATCH 16/19] od: check sign bit more often

* src/od.c (read_char, dump_strings, main):
Instead of testing for an exact negative number,
just look at the sign bit.  This is a very minor tweak.
---
 src/od.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/od.c b/src/od.c
index 847eb6d74..6d9c5d069 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1314,15 +1314,9 @@ read_char (int *c)
 
   *c = EOF;
 
-  while (in_stream != nullptr)	/* EOF.  */
+  while (in_stream && (*c = fgetc (in_stream)) < 0)
     {
-      *c = fgetc (in_stream);
-
-      if (*c != EOF)
-        break;
-
       ok &= check_and_close (errno);
-
       ok &= open_next_file ();
     }
 
@@ -1568,7 +1562,7 @@ dump_strings (void)
             }
         }
 
-      if (c == -1 || i - !c < string_min)
+      if (c < 0 || i - !c < string_min)
         continue;
 
       buf[i] = 0;
@@ -2009,7 +2003,7 @@ main (int argc, char **argv)
 
 cleanup:
 
-  if (have_read_stdin && fclose (stdin) == EOF)
+  if (have_read_stdin && fclose (stdin) < 0)
     error (EXIT_FAILURE, errno, _("standard input"));
 
   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
-- 
2.50.0

From 667be269edb23b855c15bf13bf01238dd2bf470b Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 15:21:18 -0700
Subject: [PATCH 17/19] od: speed up -S

* src/od.c (read_char): Use getc, not fgetc.
---
 src/od.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/od.c b/src/od.c
index 6d9c5d069..b3af4c72f 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1314,7 +1314,7 @@ read_char (int *c)
 
   *c = EOF;
 
-  while (in_stream && (*c = fgetc (in_stream)) < 0)
+  while (in_stream && (*c = getc (in_stream)) < 0)
     {
       ok &= check_and_close (errno);
       ok &= open_next_file ();
-- 
2.50.0

From d5ea5e8aed304337489a4dbbaf0f00d40bd92fc6 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 17:36:04 -0700
Subject: [PATCH 18/19] od: fix integer overflow with large pseudos

* src/od.c (format_address_label): Diagnose overflow.
---
 src/od.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/od.c b/src/od.c
index b3af4c72f..c3c76cc86 100644
--- a/src/od.c
+++ b/src/od.c
@@ -1227,7 +1227,12 @@ static void
 format_address_label (intmax_t address, char c)
 {
   format_address_std (address, ' ');
-  format_address_paren (address + pseudo_offset, c);
+
+  intmax_t addr;
+  if (ckd_add (&addr, address, pseudo_offset))
+    error (EXIT_FAILURE, 0, _("pseudo address too large for input"));
+
+  format_address_paren (addr, c);
 }
 
 /* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
-- 
2.50.0

From 25454fa34dc6d27239f8ad0b1dda6e2270eece62 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 28 Jun 2025 20:57:00 -0700
Subject: [PATCH 19/19] od: be more consistent re sizeof
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/od.c (width_bytes, decode_one_format): Don’t assume a signed
type has the same size as the corresponding unsigned type.
This has no effect on practical platforms; it’s just for
consistency there.
---
 src/od.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/od.c b/src/od.c
index c3c76cc86..50319aa83 100644
--- a/src/od.c
+++ b/src/od.c
@@ -131,15 +131,15 @@ struct tspec
 static const int width_bytes[] =
 {
   -1,
-  sizeof (char),
+  sizeof (unsigned char),
 #if UCHAR_MAX < USHRT_MAX
-  sizeof (short int),
+  sizeof (unsigned short int),
 #endif
 #if USHRT_MAX < UINT_MAX
-  sizeof (int),
+  sizeof (unsigned int),
 #endif
 #if UINT_MAX < ULONG_MAX
-  sizeof (long int),
+  sizeof (unsigned long int),
 #endif
 #if ULONG_MAX < ULLONG_MAX
   sizeof (unsigned long long int),
@@ -712,22 +712,22 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
         {
         case 'C':
           ++s;
-          size = sizeof (char);
+          size = sizeof (unsigned char);
           break;
 
         case 'S':
           ++s;
-          size = sizeof (short int);
+          size = sizeof (unsigned short int);
           break;
 
         case 'I':
           ++s;
-          size = sizeof (int);
+          size = sizeof (unsigned int);
           break;
 
         case 'L':
           ++s;
-          size = sizeof (long int);
+          size = sizeof (unsigned long int);
           break;
 
         default:
@@ -740,7 +740,7 @@ decode_one_format (char const *s_orig, char const *s, char const **next,
               return false;
             }
           if (p == s)
-            size = sizeof (int);
+            size = sizeof (unsigned int);
           else
             {
               if (ARRAY_CARDINALITY (integral_type_size) <= size
-- 
2.50.0

Reply via email to