The branch main has been updated by des:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d9dc1603d6e48cca84cad3ebe859129131b8387c

commit d9dc1603d6e48cca84cad3ebe859129131b8387c
Author:     Dag-Erling Smørgrav <[email protected]>
AuthorDate: 2023-08-28 15:32:23 +0000
Commit:     Dag-Erling Smørgrav <[email protected]>
CommitDate: 2023-08-28 15:33:51 +0000

    libc: Implement N2630.
    
    This adds formatted input/output of binary integer numbers to the printf(), 
scanf(), and strtol() families, including their wide-character counterparts.
    
    Reviewed by:    imp, emaste
    Differential Revision:  https://reviews.freebsd.org/D41511
---
 lib/libc/iconv/_strtol.h      |   7 ++
 lib/libc/iconv/_strtoul.h     |   7 ++
 lib/libc/locale/wcstoimax.c   |   7 ++
 lib/libc/locale/wcstol.c      |   7 ++
 lib/libc/locale/wcstoll.c     |   7 ++
 lib/libc/locale/wcstoul.c     |   7 ++
 lib/libc/locale/wcstoull.c    |   7 ++
 lib/libc/locale/wcstoumax.c   |   7 ++
 lib/libc/stdio/printfcommon.h |  14 +++
 lib/libc/stdio/vfprintf.c     |  13 ++
 lib/libc/stdio/vfscanf.c      | 267 +++++++++++++++++++++++-------------------
 lib/libc/stdio/vfwprintf.c    |  13 ++
 lib/libc/stdio/vfwscanf.c     | 263 +++++++++++++++++++++++------------------
 lib/libc/stdlib/strtoimax.c   |   7 ++
 lib/libc/stdlib/strtol.c      |   7 ++
 lib/libc/stdlib/strtoll.c     |  12 +-
 lib/libc/stdlib/strtoul.c     |   7 ++
 lib/libc/stdlib/strtoull.c    |   7 ++
 lib/libc/stdlib/strtoumax.c   |   7 ++
 19 files changed, 436 insertions(+), 237 deletions(-)

diff --git a/lib/libc/iconv/_strtol.h b/lib/libc/iconv/_strtol.h
index d183edbe8c3a..94a13c56db98 100644
--- a/lib/libc/iconv/_strtol.h
+++ b/lib/libc/iconv/_strtol.h
@@ -91,6 +91,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == '0' && (*s == 'b' || *s == 'B') &&
+           (s[1] >= '0' && s[1] <= '1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = (c == '0' ? 8 : 10);
 
diff --git a/lib/libc/iconv/_strtoul.h b/lib/libc/iconv/_strtoul.h
index eade72e9c2e6..4944e1fb06e0 100644
--- a/lib/libc/iconv/_strtoul.h
+++ b/lib/libc/iconv/_strtoul.h
@@ -87,6 +87,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == '0' && (*s == 'b' || *s == 'B') &&
+           (s[1] >= '0' && s[1] <= '1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = (c == '0' ? 8 : 10);
 
diff --git a/lib/libc/locale/wcstoimax.c b/lib/libc/locale/wcstoimax.c
index 259faa2b011c..5ed949cd0531 100644
--- a/lib/libc/locale/wcstoimax.c
+++ b/lib/libc/locale/wcstoimax.c
@@ -86,6 +86,13 @@ wcstoimax_l(const wchar_t * __restrict nptr, wchar_t ** 
__restrict endptr,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == L'0' && (*s == L'b' || *s == L'B') &&
+           (s[1] >= L'0' && s[1] <= L'1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == L'0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/locale/wcstol.c b/lib/libc/locale/wcstol.c
index b0b787384f39..1678b615ca1c 100644
--- a/lib/libc/locale/wcstol.c
+++ b/lib/libc/locale/wcstol.c
@@ -80,6 +80,13 @@ wcstol_l(const wchar_t * __restrict nptr, wchar_t ** 
__restrict endptr, int
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == L'0' && (*s == L'b' || *s == L'B') &&
+           (s[1] >= L'0' && s[1] <= L'1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == L'0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/locale/wcstoll.c b/lib/libc/locale/wcstoll.c
index ac07d6c6adbf..ef1e6ef58861 100644
--- a/lib/libc/locale/wcstoll.c
+++ b/lib/libc/locale/wcstoll.c
@@ -86,6 +86,13 @@ wcstoll_l(const wchar_t * __restrict nptr, wchar_t ** 
__restrict endptr,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == L'0' && (*s == L'b' || *s == L'B') &&
+           (s[1] >= L'0' && s[1] <= L'1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == L'0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/locale/wcstoul.c b/lib/libc/locale/wcstoul.c
index 9f58db799c0e..2c9c8820b1f6 100644
--- a/lib/libc/locale/wcstoul.c
+++ b/lib/libc/locale/wcstoul.c
@@ -80,6 +80,13 @@ wcstoul_l(const wchar_t * __restrict nptr, wchar_t ** 
__restrict endptr,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == L'0' && (*s == L'b' || *s == L'B') &&
+           (s[1] >= L'0' && s[1] <= L'1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == L'0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/locale/wcstoull.c b/lib/libc/locale/wcstoull.c
index cbc7253f884d..692eb90eef6b 100644
--- a/lib/libc/locale/wcstoull.c
+++ b/lib/libc/locale/wcstoull.c
@@ -86,6 +86,13 @@ wcstoull_l(const wchar_t * __restrict nptr, wchar_t ** 
__restrict endptr,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == L'0' && (*s == L'b' || *s == L'B') &&
+           (s[1] >= L'0' && s[1] <= L'1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == L'0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/locale/wcstoumax.c b/lib/libc/locale/wcstoumax.c
index 4380cccf2424..c4f2ec3aaf41 100644
--- a/lib/libc/locale/wcstoumax.c
+++ b/lib/libc/locale/wcstoumax.c
@@ -86,6 +86,13 @@ wcstoumax_l(const wchar_t * __restrict nptr, wchar_t ** 
__restrict endptr,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == L'0' && (*s == L'b' || *s == L'B') &&
+           (s[1] >= L'0' && s[1] <= L'1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == L'0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/stdio/printfcommon.h b/lib/libc/stdio/printfcommon.h
index ac5aed0a5fcd..411b778dc234 100644
--- a/lib/libc/stdio/printfcommon.h
+++ b/lib/libc/stdio/printfcommon.h
@@ -194,6 +194,13 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, 
const char *xdigs)
                } while (sval != 0);
                break;
 
+       case 2:
+               do {
+                       *--cp = to_char(val & 1);
+                       val >>= 1;
+               } while (val);
+               break;
+
        case 8:
                do {
                        *--cp = to_char(val & 7);
@@ -244,6 +251,13 @@ __ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, 
const char *xdigs)
                } while (sval != 0);
                break;
 
+       case 2:
+               do {
+                       *--cp = to_char(val & 1);
+                       val >>= 1;
+               } while (val);
+               break;
+
        case 8:
                do {
                        *--cp = to_char(val & 7);
diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index ad655c5d78d4..5e5a9b5e31c1 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -613,6 +613,19 @@ reswitch:  switch (ch) {
                case 'z':
                        flags |= SIZET;
                        goto rflag;
+               case 'B':
+               case 'b':
+                       if (flags & INTMAX_SIZE)
+                               ujval = UJARG();
+                       else
+                               ulval = UARG();
+                       base = 2;
+                       /* leading 0b/B only if non-zero */
+                       if (flags & ALT &&
+                           (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+                               ox[1] = ch;
+                       goto nosign;
+                       break;
                case 'C':
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index cc2e1e428321..b4db62c216ed 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -6,6 +6,8 @@
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  *
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
@@ -80,16 +82,6 @@ static char sccsid[] = "@(#)vfscanf.c        8.1 (Berkeley) 
6/4/93";
 #define        SHORTSHORT      0x4000  /* hh: char */
 #define        UNSIGNED        0x8000  /* %[oupxX] conversions */
 
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define        SIGNOK          0x40    /* +/- is (still) legal */
-#define        NDIGITS         0x80    /* no digits detected */
-#define        PFXOK           0x100   /* 0x prefix is (still) legal */
-#define        NZDIGITS        0x200   /* no zero digits detected */
-#define        HAVESIGN        0x10000 /* sign detected */
-
 /*
  * Conversion types.
  */
@@ -307,129 +299,160 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, 
locale_t locale)
        return (n);
 }
 
+enum parseint_state {
+       begin,
+       havesign,
+       havezero,
+       haveprefix,
+       any,
+};
+
+static __inline int
+parseint_fsm(int c, enum parseint_state *state, int *base)
+{
+       switch (c) {
+       case '+':
+       case '-':
+               if (*state == begin) {
+                       *state = havesign;
+                       return 1;
+               }
+               break;
+       case '0':
+               if (*state == begin || *state == havesign) {
+                       *state = havezero;
+               } else {
+                       *state = any;
+               }
+               return 1;
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+               if (*state == havezero && *base == 0) {
+                       *base = 8;
+               }
+               /* FALL THROUGH */
+       case '8':
+       case '9':
+               if (*state == begin ||
+                   *state == havesign) {
+                       if (*base == 0) {
+                               *base = 10;
+                       }
+               }
+               if (*state == begin ||
+                   *state == havesign ||
+                   *state == havezero ||
+                   *state == haveprefix ||
+                   *state == any) {
+                       if (*base > c - '0') {
+                               *state = any;
+                               return 1;
+                       }
+               }
+               break;
+       case 'b':
+               if (*state == havezero) {
+                       if (*base == 0 || *base == 2) {
+                               *state = haveprefix;
+                               *base = 2;
+                               return 1;
+                       }
+               }
+               /* FALL THROUGH */
+       case 'a':
+       case 'c':
+       case 'd':
+       case 'e':
+       case 'f':
+               if (*state == begin ||
+                   *state == havesign ||
+                   *state == havezero ||
+                   *state == haveprefix ||
+                   *state == any) {
+                       if (*base > c - 'a' + 10) {
+                               *state = any;
+                               return 1;
+                       }
+               }
+               break;
+       case 'B':
+               if (*state == havezero) {
+                       if (*base == 0 || *base == 2) {
+                               *state = haveprefix;
+                               *base = 2;
+                               return 1;
+                       }
+               }
+               /* FALL THROUGH */
+       case 'A':
+       case 'C':
+       case 'D':
+       case 'E':
+       case 'F':
+               if (*state == begin ||
+                   *state == havesign ||
+                   *state == havezero ||
+                   *state == haveprefix ||
+                   *state == any) {
+                       if (*base > c - 'A' + 10) {
+                               *state = any;
+                               return 1;
+                       }
+               }
+               break;
+       case 'x':
+       case 'X':
+               if (*state == havezero) {
+                       if (*base == 0 || *base == 16) {
+                               *state = haveprefix;
+                               *base = 16;
+                               return 1;
+                       }
+               }
+               break;
+       }
+       return 0;
+}
+
 /*
- * Read an integer, storing it in buf.  The only relevant bit in the
- * flags argument is PFXOK.
+ * Read an integer, storing it in buf.
  *
  * Return 0 on a match failure, and the number of characters read
  * otherwise.
  */
 static __inline int
-parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
+parseint(FILE *fp, char * __restrict buf, int width, int base)
 {
-       /* `basefix' is used to avoid `if' tests */
-       static const short basefix[17] =
-               { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+       enum parseint_state state = begin;
        char *p;
        int c;
 
-       flags |= SIGNOK | NDIGITS | NZDIGITS;
        for (p = buf; width; width--) {
-               c = *fp->_p;
-               /*
-                * Switch on the character; `goto ok' if we accept it
-                * as a part of number.
-                */
-               switch (c) {
-
-               /*
-                * The digit 0 is always legal, but is special.  For
-                * %i conversions, if no digits (zero or nonzero) have
-                * been scanned (only signs), we will have base==0.
-                * In that case, we should set it to 8 and enable 0x
-                * prefixing.  Also, if we have not scanned zero
-                * digits before this, do not turn off prefixing
-                * (someone else will turn it off if we have scanned
-                * any nonzero digits).
-                */
-               case '0':
-                       if (base == 0) {
-                               base = 8;
-                               flags |= PFXOK;
-                       }
-                       if (flags & NZDIGITS)
-                               flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
-                       else
-                               flags &= ~(SIGNOK|PFXOK|NDIGITS);
-                       goto ok;
-
-               /* 1 through 7 always legal */
-               case '1': case '2': case '3':
-               case '4': case '5': case '6': case '7':
-                       base = basefix[base];
-                       flags &= ~(SIGNOK | PFXOK | NDIGITS);
-                       goto ok;
-
-               /* digits 8 and 9 ok iff decimal or hex */
-               case '8': case '9':
-                       base = basefix[base];
-                       if (base <= 8)
-                               break;  /* not legal here */
-                       flags &= ~(SIGNOK | PFXOK | NDIGITS);
-                       goto ok;
-
-               /* letters ok iff hex */
-               case 'A': case 'B': case 'C':
-               case 'D': case 'E': case 'F':
-               case 'a': case 'b': case 'c':
-               case 'd': case 'e': case 'f':
-                       /* no need to fix base here */
-                       if (base <= 10)
-                               break;  /* not legal here */
-                       flags &= ~(SIGNOK | PFXOK | NDIGITS);
-                       goto ok;
-
-               /* sign ok only as first character */
-               case '+': case '-':
-                       if (flags & SIGNOK) {
-                               flags &= ~SIGNOK;
-                               flags |= HAVESIGN;
-                               goto ok;
-                       }
+               c = __sgetc(fp);
+               if (c == EOF)
                        break;
-
-               /*
-                * x ok iff flag still set & 2nd char (or 3rd char if
-                * we have a sign).
-                */
-               case 'x': case 'X':
-                       if (flags & PFXOK && p ==
-                           buf + 1 + !!(flags & HAVESIGN)) {
-                               base = 16;      /* if %i */
-                               flags &= ~PFXOK;
-                               goto ok;
-                       }
+               if (!parseint_fsm(c, &state, &base))
                        break;
-               }
-
-               /*
-                * If we got here, c is not a legal character for a
-                * number.  Stop accumulating digits.
-                */
-               break;
-       ok:
-               /*
-                * c is legal: store it and look at the next.
-                */
                *p++ = c;
-               if (--fp->_r > 0)
-                       fp->_p++;
-               else if (__srefill(fp))
-                       break;          /* EOF */
        }
        /*
-        * If we had only a sign, it is no good; push back the sign.
-        * If the number ends in `x', it was [sign] '0' 'x', so push
-        * back the x and treat it as [sign] '0'.
+        * If we only had a sign, push it back.  If we only had a 0b or 0x
+        * prefix (possibly preceded by a sign), we view it as "0" and
+        * push back the letter.  In all other cases, if we stopped
+        * because we read a non-number character, push it back.
         */
-       if (flags & NDIGITS) {
-               if (p > buf)
-                       (void) __ungetc(*(u_char *)--p, fp);
-               return (0);
-       }
-       c = ((u_char *)p)[-1];
-       if (c == 'x' || c == 'X') {
-               --p;
+       if (state == havesign) {
+               p--;
+               (void) __ungetc(*(u_char *)p, fp);
+       } else if (state == haveprefix) {
+               p--;
+               (void) __ungetc(c, fp);
+       } else if (c != EOF) {
                (void) __ungetc(c, fp);
        }
        return (p - buf);
@@ -554,6 +577,13 @@ literal:
                /*
                 * Conversions.
                 */
+               case 'B':
+               case 'b':
+                       c = CT_INT;
+                       flags |= UNSIGNED;
+                       base = 2;
+                       break;
+
                case 'd':
                        c = CT_INT;
                        base = 10;
@@ -578,7 +608,6 @@ literal:
 
                case 'X':
                case 'x':
-                       flags |= PFXOK; /* enable 0x prefixing */
                        c = CT_INT;
                        flags |= UNSIGNED;
                        base = 16;
@@ -613,7 +642,7 @@ literal:
                        break;
 
                case 'p':       /* pointer format is like hex */
-                       flags |= POINTER | PFXOK;
+                       flags |= POINTER;
                        c = CT_INT;             /* assumes sizeof(uintmax_t) */
                        flags |= UNSIGNED;      /*      >= sizeof(uintptr_t) */
                        base = 16;
@@ -738,7 +767,7 @@ literal:
                                width = sizeof(buf) - 2;
                        width++;
 #endif
-                       nr = parseint(fp, buf, width, base, flags);
+                       nr = parseint(fp, buf, width, base);
                        if (nr == 0)
                                goto match_failure;
                        if ((flags & SUPPRESS) == 0) {
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index fc681e8d0575..259a86467ea7 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c
@@ -684,6 +684,19 @@ reswitch:  switch (ch) {
                case 'z':
                        flags |= SIZET;
                        goto rflag;
+               case 'B':
+               case 'b':
+                       if (flags & INTMAX_SIZE)
+                               ujval = UJARG();
+                       else
+                               ulval = UARG();
+                       base = 2;
+                       /* leading 0b/B only if non-zero */
+                       if (flags & ALT &&
+                           (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+                               ox[1] = ch;
+                       goto nosign;
+                       break;
                case 'C':
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c
index 1a28ff665247..3300751eafb6 100644
--- a/lib/libc/stdio/vfwscanf.c
+++ b/lib/libc/stdio/vfwscanf.c
@@ -9,6 +9,8 @@
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  *
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
@@ -78,16 +80,6 @@ static char sccsid[] = "@(#)vfscanf.c        8.1 (Berkeley) 
6/4/93";
 #define        SHORTSHORT      0x4000  /* hh: char */
 #define        UNSIGNED        0x8000  /* %[oupxX] conversions */
 
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define        SIGNOK          0x40    /* +/- is (still) legal */
-#define        NDIGITS         0x80    /* no digits detected */
-#define        PFXOK           0x100   /* 0x prefix is (still) legal */
-#define        NZDIGITS        0x200   /* no zero digits detected */
-#define        HAVESIGN        0x10000 /* sign detected */
-
 /*
  * Conversion types.
  */
@@ -289,128 +281,161 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, 
locale_t locale)
        return (nread);
 }
 
+enum parseint_state {
+       begin,
+       havesign,
+       havezero,
+       haveprefix,
+       any,
+};
+
+static __inline int
+parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
+{
+       switch (c) {
+       case '+':
+       case '-':
+               if (*state == begin) {
+                       *state = havesign;
+                       return 1;
+               }
+               break;
+       case '0':
+               if (*state == begin || *state == havesign) {
+                       *state = havezero;
+               } else {
+                       *state = any;
+               }
+               return 1;
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+               if (*state == havezero && *base == 0) {
+                       *base = 8;
+               }
+               /* FALL THROUGH */
+       case '8':
+       case '9':
+               if (*state == begin ||
+                   *state == havesign) {
+                       if (*base == 0) {
+                               *base = 10;
+                       }
+               }
+               if (*state == begin ||
+                   *state == havesign ||
+                   *state == havezero ||
+                   *state == haveprefix ||
+                   *state == any) {
+                       if (*base > c - '0') {
+                               *state = any;
+                               return 1;
+                       }
+               }
+               break;
+       case 'b':
+               if (*state == havezero) {
+                       if (*base == 0 || *base == 2) {
+                               *state = haveprefix;
+                               *base = 2;
+                               return 1;
+                       }
+               }
+               /* FALL THROUGH */
+       case 'a':
+       case 'c':
+       case 'd':
+       case 'e':
+       case 'f':
+               if (*state == begin ||
+                   *state == havesign ||
+                   *state == havezero ||
+                   *state == haveprefix ||
+                   *state == any) {
+                       if (*base > c - 'a' + 10) {
+                               *state = any;
+                               return 1;
+                       }
+               }
+               break;
+       case 'B':
+               if (*state == havezero) {
+                       if (*base == 0 || *base == 2) {
+                               *state = haveprefix;
+                               *base = 2;
+                               return 1;
+                       }
+               }
+               /* FALL THROUGH */
+       case 'A':
+       case 'C':
+       case 'D':
+       case 'E':
+       case 'F':
+               if (*state == begin ||
+                   *state == havesign ||
+                   *state == havezero ||
+                   *state == haveprefix ||
+                   *state == any) {
+                       if (*base > c - 'A' + 10) {
+                               *state = any;
+                               return 1;
+                       }
+               }
+               break;
+       case 'x':
+       case 'X':
+               if (*state == havezero) {
+                       if (*base == 0 || *base == 16) {
+                               *state = haveprefix;
+                               *base = 16;
+                               return 1;
+                       }
+               }
+               break;
+       }
+       return 0;
+}
+
 /*
- * Read an integer, storing it in buf.  The only relevant bit in the
- * flags argument is PFXOK.
+ * Read an integer, storing it in buf.
  *
  * Return 0 on a match failure, and the number of characters read
  * otherwise.
  */
 static __inline int
-parseint(FILE *fp, wchar_t *buf, int width, int base, int flags,
+parseint(FILE *fp, wchar_t * __restrict buf, int width, int base,
     locale_t locale)
 {
-       /* `basefix' is used to avoid `if' tests */
-       static const short basefix[17] =
-               { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+       enum parseint_state state = begin;
        wchar_t *wcp;
        int c;
 
-       flags |= SIGNOK | NDIGITS | NZDIGITS;
        for (wcp = buf; width; width--) {
                c = __fgetwc(fp, locale);
-               /*
-                * Switch on the character; `goto ok' if we accept it
-                * as a part of number.
-                */
-               switch (c) {
-
-               /*
-                * The digit 0 is always legal, but is special.  For
-                * %i conversions, if no digits (zero or nonzero) have
-                * been scanned (only signs), we will have base==0.
-                * In that case, we should set it to 8 and enable 0x
-                * prefixing.  Also, if we have not scanned zero
-                * digits before this, do not turn off prefixing
-                * (someone else will turn it off if we have scanned
-                * any nonzero digits).
-                */
-               case '0':
-                       if (base == 0) {
-                               base = 8;
-                               flags |= PFXOK;
-                       }
-                       if (flags & NZDIGITS)
-                               flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
-                       else
-                               flags &= ~(SIGNOK|PFXOK|NDIGITS);
-                       goto ok;
-
-               /* 1 through 7 always legal */
-               case '1': case '2': case '3':
-               case '4': case '5': case '6': case '7':
-                       base = basefix[base];
-                       flags &= ~(SIGNOK | PFXOK | NDIGITS);
-                       goto ok;
-
-               /* digits 8 and 9 ok iff decimal or hex */
-               case '8': case '9':
-                       base = basefix[base];
-                       if (base <= 8)
-                               break;  /* not legal here */
-                       flags &= ~(SIGNOK | PFXOK | NDIGITS);
-                       goto ok;
-
-               /* letters ok iff hex */
-               case 'A': case 'B': case 'C':
-               case 'D': case 'E': case 'F':
-               case 'a': case 'b': case 'c':
-               case 'd': case 'e': case 'f':
-                       /* no need to fix base here */
-                       if (base <= 10)
-                               break;  /* not legal here */
-                       flags &= ~(SIGNOK | PFXOK | NDIGITS);
-                       goto ok;
-
-               /* sign ok only as first character */
-               case '+': case '-':
-                       if (flags & SIGNOK) {
-                               flags &= ~SIGNOK;
-                               flags |= HAVESIGN;
-                               goto ok;
-                       }
+               if (c == WEOF)
                        break;
-
-               /*
-                * x ok iff flag still set & 2nd char (or 3rd char if
-                * we have a sign).
-                */
-               case 'x': case 'X':
-                       if (flags & PFXOK && wcp ==
-                           buf + 1 + !!(flags & HAVESIGN)) {
-                               base = 16;      /* if %i */
-                               flags &= ~PFXOK;
-                               goto ok;
-                       }
+               if (!parseint_fsm(c, &state, &base))
                        break;
-               }
-
-               /*
-                * If we got here, c is not a legal character for a
-                * number.  Stop accumulating digits.
-                */
-               if (c != WEOF)
-                       __ungetwc(c, fp, locale);
-               break;
-       ok:
-               /*
-                * c is legal: store it and look at the next.
-                */
                *wcp++ = (wchar_t)c;
        }
        /*
-        * If we had only a sign, it is no good; push back the sign.
-        * If the number ends in `x', it was [sign] '0' 'x', so push
-        * back the x and treat it as [sign] '0'.
+        * If we only had a sign, push it back.  If we only had a 0b or 0x
+        * prefix (possibly preceded by a sign), we view it as "0" and
+        * push back the letter.  In all other cases, if we stopped
+        * because we read a non-number character, push it back.
         */
-       if (flags & NDIGITS) {
-               if (wcp > buf)
-                       __ungetwc(*--wcp, fp, locale);
-               return (0);
-       }
-       c = wcp[-1];
-       if (c == 'x' || c == 'X') {
-               --wcp;
+       if (state == havesign) {
+               wcp--;
+               __ungetwc(*wcp, fp, locale);
+       } else if (state == haveprefix) {
+               wcp--;
+               __ungetwc(c, fp, locale);
+       } else if (c != WEOF) {
                __ungetwc(c, fp, locale);
        }
        return (wcp - buf);
@@ -536,6 +561,13 @@ literal:
                /*
                 * Conversions.
                 */
+               case 'B':
+               case 'b':
+                       c = CT_INT;
+                       flags |= UNSIGNED;
+                       base = 2;
+                       break;
+
                case 'd':
                        c = CT_INT;
                        base = 10;
@@ -560,7 +592,6 @@ literal:
 
                case 'X':
                case 'x':
-                       flags |= PFXOK; /* enable 0x prefixing */
                        c = CT_INT;
                        flags |= UNSIGNED;
                        base = 16;
@@ -606,7 +637,7 @@ literal:
                        break;
 
                case 'p':       /* pointer format is like hex */
-                       flags |= POINTER | PFXOK;
+                       flags |= POINTER;
                        c = CT_INT;             /* assumes sizeof(uintmax_t) */
                        flags |= UNSIGNED;      /*      >= sizeof(uintptr_t) */
                        base = 16;
@@ -716,7 +747,7 @@ literal:
                            sizeof(*buf) - 1)
                                width = sizeof(buf) / sizeof(*buf) - 1;
 
-                       nr = parseint(fp, buf, width, base, flags, locale);
+                       nr = parseint(fp, buf, width, base, locale);
                        if (nr == 0)
                                goto match_failure;
                        if ((flags & SUPPRESS) == 0) {
diff --git a/lib/libc/stdlib/strtoimax.c b/lib/libc/stdlib/strtoimax.c
index 894d801940fd..5309b7d4305c 100644
--- a/lib/libc/stdlib/strtoimax.c
+++ b/lib/libc/stdlib/strtoimax.c
@@ -87,6 +87,13 @@ strtoimax_l(const char * __restrict nptr, char ** __restrict 
endptr, int base,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == '0' && (*s == 'b' || *s == 'B') &&
+           (s[1] >= '0' && s[1] <= '1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == '0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/stdlib/strtol.c b/lib/libc/stdlib/strtol.c
index 360bb7efc8be..1ca95918ef12 100644
--- a/lib/libc/stdlib/strtol.c
+++ b/lib/libc/stdlib/strtol.c
@@ -87,6 +87,13 @@ strtol_l(const char * __restrict nptr, char ** __restrict 
endptr, int base,
                s += 2;
                base = 16;
        }
+       if ((base == 0 || base == 2) &&
+           c == '0' && (*s == 'b' || *s == 'B') &&
+           (s[1] >= '0' && s[1] <= '1')) {
+               c = s[1];
+               s += 2;
+               base = 2;
+       }
        if (base == 0)
                base = c == '0' ? 8 : 10;
        acc = any = 0;
diff --git a/lib/libc/stdlib/strtoll.c b/lib/libc/stdlib/strtoll.c
index 51a523e51fb8..6845776c5f03 100644
--- a/lib/libc/stdlib/strtoll.c
+++ b/lib/libc/stdlib/strtoll.c
@@ -63,8 +63,9 @@ strtoll_l(const char * __restrict nptr, char ** __restrict 
endptr, int base,
 
        /*
         * Skip white space and pick up leading +/- sign if any.
-        * If base is 0, allow 0x for hex and 0 for octal, else
-        * assume decimal; if base is already 16, allow 0x.
+        * If base is 0, allow 0b for binary, 0x for hex, and 0 for
+        * octal, else assume decimal; if base is already 2, allow
+        * 0b; if base is already 16, allow 0x.
         */
        s = nptr;
        do {
@@ -87,6 +88,13 @@ strtoll_l(const char * __restrict nptr, char ** __restrict 
endptr, int base,
*** 67 LINES SKIPPED ***

Reply via email to