On 5/3/20 2:58 AM, Bruno Haible wrote:
> Unfortunately, this patch does not fix the test failure on FreeBSD 12.0.

There are some other places where the m4 code assumes wrapv arithmetic (or
worse, where it messes up in other ways on integer overflow). Attached is a
patch that fixes some of the problems I found. I expect this'll fix the bug you
ran into.

Silent wraparound is not good, and m4's builtins should either diagnose integer
overflow, or (better) m4 should use GMP so that there's no practical limit. But
that's a bigger project.
>From b6502d7a990cc834bdcf2154f563da68a2ccc2b4 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 3 May 2020 18:32:57 -0700
Subject: [PATCH] Fix integer overflow bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem reported by Bruno Haible in:
https://lists.gnu.org/r/bug-m4/2020-05/msg00001.html
I found some other problems while fixing this one.
* src/builtin.c: Include intprops.h.
(numeric_arg): Yield long, not int, so that string lengths
need not fit in int.  All callers changed.
Don’t suppress overflow warning merely because the
arg has leading whitespace.
It’s possible to have numeric overflow and whitespace both.
(m4_eval, m4_incr, m4_decr):
Do not assume wrapv integer overflow.
(m4_substr): Fix integer overflow bug.
* src/eval.c: Include intprops.h
(eval_lex): Do not assume wrapv integer overflow.
---
 src/builtin.c | 65 ++++++++++++++++++++++++++++++---------------------
 src/eval.c    |  8 +++++--
 2 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/src/builtin.c b/src/builtin.c
index 82a92f68..20535bf3 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -25,6 +25,7 @@
 #include "m4.h"
 
 #include "execute.h"
+#include "intprops.h"
 #include "memchr2.h"
 #include "progname.h"
 #include "regex.h"
@@ -407,7 +408,7 @@ bad_argc (token_data *name, int argc, int min, int max)
 `-----------------------------------------------------------------*/
 
 static bool
-numeric_arg (token_data *macro, const char *arg, int *valuep)
+numeric_arg (token_data *macro, const char *arg, long int *valuep)
 {
   char *endp;
 
@@ -429,13 +430,13 @@ numeric_arg (token_data *macro, const char *arg, int *valuep)
                     TOKEN_DATA_TEXT (macro)));
           return false;
         }
-      if (isspace (to_uchar (*arg)))
+      if (errno == ERANGE)
         M4ERROR ((warning_status, 0,
-                  "leading whitespace ignored in builtin `%s'",
+                  "numeric overflow detected in builtin `%s'",
                   TOKEN_DATA_TEXT (macro)));
-      else if (errno == ERANGE)
+      if (isspace (to_uchar (*arg)))
         M4ERROR ((warning_status, 0,
-                  "numeric overflow detected in builtin `%s'",
+                  "leading whitespace ignored in builtin `%s'",
                   TOKEN_DATA_TEXT (macro)));
     }
   return true;
@@ -1085,8 +1086,8 @@ static void
 m4_eval (struct obstack *obs, int argc, token_data **argv)
 {
   int32_t value = 0;
-  int radix = 10;
-  int min = 1;
+  long int radix = 10;
+  long int min = 1;
   const char *s;
 
   if (bad_argc (argv[0], argc, 2, 4))
@@ -1095,10 +1096,10 @@ m4_eval (struct obstack *obs, int argc, token_data **argv)
   if (*ARG (2) && !numeric_arg (argv[0], ARG (2), &radix))
     return;
 
-  if (radix < 1 || radix > (int) strlen (digits))
+  if (radix < 1 || (unsigned long int) radix > strlen (digits))
     {
       M4ERROR ((warning_status, 0,
-                "radix %d in builtin `%s' out of range",
+                "radix %ld in builtin `%s' out of range",
                 radix, ARG (0)));
       return;
     }
@@ -1123,13 +1124,20 @@ m4_eval (struct obstack *obs, int argc, token_data **argv)
       if (value < 0)
         {
           obstack_1grow (obs, '-');
-          value = -value;
+          while (-(min--) < value)
+            obstack_1grow (obs, '0');
+
+          do
+            obstack_1grow (obs, '1');
+          while (++value != 0);
+        }
+      else
+        {
+          while (value < min--)
+            obstack_1grow (obs, '0');
+          while (value-- != 0)
+            obstack_1grow (obs, '1');
         }
-      /* This assumes 2's-complement for correctly handling INT_MIN.  */
-      while (min-- - value > 0)
-        obstack_1grow (obs, '0');
-      while (value-- != 0)
-        obstack_1grow (obs, '1');
       obstack_1grow (obs, '\0');
       return;
     }
@@ -1150,7 +1158,8 @@ m4_eval (struct obstack *obs, int argc, token_data **argv)
 static void
 m4_incr (struct obstack *obs, int argc, token_data **argv)
 {
-  int value;
+  long int value;
+  int32_t value32;
 
   if (bad_argc (argv[0], argc, 2, 2))
     return;
@@ -1158,13 +1167,15 @@ m4_incr (struct obstack *obs, int argc, token_data **argv)
   if (!numeric_arg (argv[0], ARG (1), &value))
     return;
 
-  shipout_int (obs, value + 1);
+  INT_ADD_WRAPV (value, 1, &value32);
+  shipout_int (obs, value32);
 }
 
 static void
 m4_decr (struct obstack *obs, int argc, token_data **argv)
 {
-  int value;
+  long int value;
+  int32_t value32;
 
   if (bad_argc (argv[0], argc, 2, 2))
     return;
@@ -1172,7 +1183,8 @@ m4_decr (struct obstack *obs, int argc, token_data **argv)
   if (!numeric_arg (argv[0], ARG (1), &value))
     return;
 
-  shipout_int (obs, value - 1);
+  INT_SUBTRACT_WRAPV (value, 1, &value32);
+  shipout_int (obs, value32);
 }
 
 /* This section contains the macros "divert", "undivert" and "divnum" for
@@ -1186,12 +1198,13 @@ m4_decr (struct obstack *obs, int argc, token_data **argv)
 static void
 m4_divert (struct obstack *obs M4_GNUC_UNUSED, int argc, token_data **argv)
 {
-  int i = 0;
+  long int i = 0;
 
   if (bad_argc (argv[0], argc, 1, 2))
     return;
 
-  if (argc >= 2 && !numeric_arg (argv[0], ARG (1), &i))
+  if (argc >= 2 && ! (numeric_arg (argv[0], ARG (1), &i)
+                      && INT_MIN <= i && i <= INT_MAX))
     return;
 
   make_diversion (i);
@@ -1538,7 +1551,7 @@ m4___program__ (struct obstack *obs, int argc, token_data **argv)
 static void M4_GNUC_NORETURN
 m4_m4exit (struct obstack *obs M4_GNUC_UNUSED, int argc, token_data **argv)
 {
-  int exit_code = EXIT_SUCCESS;
+  long int exit_code = EXIT_SUCCESS;
 
   /* Warn on bad arguments, but still exit.  */
   bad_argc (argv[0], argc, 1, 2);
@@ -1547,7 +1560,7 @@ m4_m4exit (struct obstack *obs M4_GNUC_UNUSED, int argc, token_data **argv)
   if (exit_code < 0 || exit_code > 255)
     {
       M4ERROR ((warning_status, 0,
-                "exit status out of range: `%d'", exit_code));
+                "exit status out of range: `%ld'", exit_code));
       exit_code = EXIT_FAILURE;
     }
   /* Change debug stream back to stderr, to force flushing debug stream and
@@ -1769,8 +1782,8 @@ m4_index (struct obstack *obs, int argc, token_data **argv)
 static void
 m4_substr (struct obstack *obs, int argc, token_data **argv)
 {
-  int start = 0;
-  int length, avail;
+  long int start = 0;
+  long int length, avail;
 
   if (bad_argc (argv[0], argc, 3, 4))
     {
@@ -1790,7 +1803,7 @@ m4_substr (struct obstack *obs, int argc, token_data **argv)
   if (start < 0 || length <= 0 || start >= avail)
     return;
 
-  if (start + length > avail)
+  if (avail - start < length)
     length = avail - start;
   obstack_grow (obs, ARG (1) + start, length);
 }
diff --git a/src/eval.c b/src/eval.c
index 7f286a10..c4fd2d36 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -25,6 +25,7 @@
    is evaluate ().  */
 
 #include "m4.h"
+#include "intprops.h"
 
 /* Evaluates token types.  */
 
@@ -152,7 +153,7 @@ eval_lex (int32_t *val)
       else
         base = 10;
 
-      /* FIXME - this calculation can overflow.  Consider xstrtol.  */
+      /* FIXME - this calculation can overflow, with no diagnostic.  */
       *val = 0;
       for (; *eval_text; eval_text++)
         {
@@ -177,7 +178,10 @@ eval_lex (int32_t *val)
           else if (digit >= base)
             break;
           else
-            *val = *val * base + digit;
+            {
+              INT_MULTIPLY_WRAPV (*val, base, val);
+              INT_ADD_WRAPV (*val, digit, val);
+            }
         }
       return NUMBER;
     }
-- 
2.17.1

Reply via email to