kstrto*() and kstrto*_from_user() family of functions were added
help with parsing one integer written as string to proc/sysfs/debugfs
files and pass it elsewhere. But they have a limitation: string passed
must end with \0 or \n\0. There are enough places where kstrto*()
functions can't be used because of this limitation. Trivial example:
parse "%u.%u".

Currently the only way to parse everything is simple_strto*() family of
functions. But they are suboptimal:
* they do not detect overflow,
* there are only 2(4) of them -- long and "long long" version.
  This leads to silent truncation in the most simple case:

        val = strtoul(s, NULL, 0);

* half of the people think that "char **endp" argument is necessary and
  add unnecessary variable.

OpenBSD people, fed up with how complex correct integer parsing is, added
strtonum(3) to fixup for deficiencies of libc-style integer parsing:
http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man3/strtonum.3?query=strtonum&arch=i386

It'd be OK to copy that but it relies on "errno" and fixed strings as
error-reporting channel which I think is not OK for kernel.
strtonum() also doesn't report number of characted consumed.

What to do?

Enter parse_integer().

        int parse_integer(const char *s, unsigned int base, T *val);

Rationale:

* parse_integer() is exactly 1 (one) interface not 2 or many,
  one for each type.
* parse_integer() reports -E errors reliably in a canonical kernel way:

        rv = parse_integer(str, 10, &val);
        if (rv < 0)
                return rv;

* parse_integer() writes result only if there were no errors, at least
  one digit was consumed.
* parse_integer doesn't mix error reporting channel and value channel,
  it does mix error and number-of-character-consumed channel, though.

* parse_integer() reports number of characters consumed, makes parsing
  multiple values easy:

        rv = parse_integer(str, 0, &val1);
        if (rv < 0)
                return rv;
        s += rv;
        if (s != '.')
                return -EINVAL;
        rv = parse_integer(str, 0, &val2);
        if (rv < 0)
                return rv;
        if (str[rv] != '\0')
                return -EINVAL;

There are several deficiencies in parse_integer() compared to strto*():
* can't be used in initializers:

        const T x = strtoul();

* can't be used with bitfields,
* can't be used in experessions:

        x = strtol() * 1024;
        x = y = strtol() * 1024;
        x += strtol();

In defense of parse_integer() all I can say, is that using strtol() in
expression or initializer promotes no error checking and thus probably
should not be encouraged in C, language with no built-in error checking
anyway.

The amount of "x = y = strtol()" expressions in kernel is very small.
The amount of direct usage in expression is not small, but can be
counted as an acceptable loss.

Signed-off-by: Alexey Dobriyan <adobri...@gmail.com>
---

 include/linux/kernel.h |   72 +++++++++++++++++++
 lib/Makefile           |    1 
 lib/kstrtox.c          |   27 +------
 lib/parse-integer.c    |  180 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 257 insertions(+), 23 deletions(-)

--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -251,6 +251,78 @@ void do_exit(long error_code)
 void complete_and_exit(struct completion *, long)
        __noreturn;
 
+/*
+ * int parse_integer(const char *s, unsigned int base, T *val);
+ *
+ * Convert integer string representation to an integer.
+ * If an integer doesn't fit into specified type, -E is returned.
+ *
+ * Conversion to signed integer accepts sign "-".
+ * Conversion to an unsigned integer doesn't accept sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hints work after optional sign, but not before.
+ *
+ * If -E is returned, result is not touched.
+ *
+ * "T = char" case is not supported because -f{un,}signed-char can silently
+ * change range of accepted values.
+ */
+#define parse_integer(s, base, val)    \
+({                                     \
+       const char *_s = (s);           \
+       unsigned int _base = (base);    \
+       typeof(val) _val = (val);       \
+                                       \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), signed char *),      \
+       _parse_integer_sc(_s, _base, (void *)_val),                     \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), unsigned char *),    \
+       _parse_integer_uc(_s, _base, (void *)_val),                     \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), short *),            \
+       _parse_integer_s(_s, _base, (void *)_val),                      \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), unsigned short *),   \
+       _parse_integer_us(_s, _base, (void *)_val),                     \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), int *),              \
+       _parse_integer_i(_s, _base, (void *)_val),                      \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), unsigned int *),     \
+       _parse_integer_u(_s, _base, (void *)_val),                      \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), long *) && BITS_PER_LONG == 
32,\
+       _parse_integer_i(_s, _base, (void *)_val),                      \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), long *) && BITS_PER_LONG == 
64,\
+       _parse_integer_ll(_s, _base, (void *)_val),                     \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), unsigned long *) && 
BITS_PER_LONG == 32,\
+       _parse_integer_u(_s, _base, (void *)_val),                      \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), unsigned long *) && 
BITS_PER_LONG == 64,\
+       _parse_integer_ull(_s, _base, (void *)_val),                    \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), long long *),        \
+       _parse_integer_ll(_s, _base, (void *)_val),                     \
+       __builtin_choose_expr(                                          \
+       __builtin_types_compatible_p(typeof(_val), unsigned long long *),\
+       _parse_integer_ull(_s, _base, (void *)_val),                    \
+       _parse_integer_link_time_error()))))))))))));   \
+})
+int _parse_integer_sc(const char *s, unsigned int base, signed char *val);
+int _parse_integer_uc(const char *s, unsigned int base, unsigned char *val);
+int _parse_integer_s(const char *s, unsigned int base, short *val);
+int _parse_integer_us(const char *s, unsigned int base, unsigned short *val);
+int _parse_integer_i(const char *s, unsigned int base, int *val);
+int _parse_integer_u(const char *s, unsigned int base, unsigned int *val);
+int _parse_integer_ll(const char *s, unsigned int base, long long *val);
+int _parse_integer_ull(const char *s, unsigned int base, unsigned long long 
*val);
+void _parse_integer_link_time_error(void);
+
 /* Internal, do not use. */
 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long 
*res);
 int __must_check _kstrtol(const char *s, unsigned int base, long *res);
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o
 obj-y += kstrtox.o
+obj-y += parse-integer.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -20,22 +20,6 @@
 #include <asm/uaccess.h>
 #include "kstrtox.h"
 
-const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
-{
-       if (*base == 0) {
-               if (s[0] == '0') {
-                       if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
-                               *base = 16;
-                       else
-                               *base = 8;
-               } else
-                       *base = 10;
-       }
-       if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
-               s += 2;
-       return s;
-}
-
 /*
  * Convert non-negative integer string representation in explicitly given radix
  * to an integer.
@@ -86,14 +70,11 @@ unsigned int _parse_integer(const char *s, unsigned int 
base, unsigned long long
 static int _kstrtoull(const char *s, unsigned int base, unsigned long long 
*res)
 {
        unsigned long long _res;
-       unsigned int rv;
+       int rv;
 
-       s = _parse_integer_fixup_radix(s, &base);
-       rv = _parse_integer(s, base, &_res);
-       if (rv & KSTRTOX_OVERFLOW)
-               return -ERANGE;
-       if (rv == 0)
-               return -EINVAL;
+       rv = parse_integer(s, base, &_res);
+       if (rv < 0)
+               return rv;
        s += rv;
        if (*s == '\n')
                s++;
--- /dev/null
+++ b/lib/parse-integer.c
@@ -0,0 +1,180 @@
+/*
+ * See parse_integer().
+ *
+ * Individual dispatch functions in this file aren't supposed to be used
+ * directly and thus aren't advertised and documented despited being exported.
+ *
+ * Do not use any function in this file for any reason.
+ */
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <asm/bug.h>
+#include "kstrtox.h"
+
+const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+       if (*base == 0) {
+               if (s[0] == '0') {
+                       if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+                               *base = 16;
+                       else
+                               *base = 8;
+               } else
+                       *base = 10;
+       }
+       if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+               s += 2;
+       BUG_ON(*base < 2 || *base > 16);
+       return s;
+}
+
+int _parse_integer_ull(const char *s, unsigned int base, unsigned long long 
*val)
+{
+       const char *s0 = s, *sd;
+       unsigned long long acc;
+
+       s = sd = _parse_integer_fixup_radix(s0, &base);
+       acc = 0;
+       while (*s != '\0') {
+               unsigned int d;
+
+               if ('0' <= *s && *s <= '9')
+                       d = *s - '0';
+               else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+                       d = _tolower(*s) - 'a' + 10;
+               else
+                       break;
+               if (d >= base)
+                       break;
+               /* Overflow can't happen early enough. */
+               if ((acc >> 60) && acc > div_u64(ULLONG_MAX - d, base))
+                       return -ERANGE;
+               acc = acc * base + d;
+               s++;
+       }
+       /* At least one digit has to be converted. */
+       if (s == sd)
+               return -EINVAL;
+       *val = acc;
+       /* Radix 1 is not supported otherwise returned length can overflow. */
+       return s - s0;
+}
+EXPORT_SYMBOL(_parse_integer_ull);
+
+int _parse_integer_ll(const char *s, unsigned int base, long long *val)
+{
+       unsigned long long tmp;
+       int rv;
+
+       if (*s == '-') {
+               rv = _parse_integer_ull(s + 1, base, &tmp);
+               if (rv < 0)
+                       return rv;
+               if ((long long)-tmp >= 0)
+                       return -ERANGE;
+               *val = -tmp;
+               return rv + 1;
+       } else {
+               rv = _parse_integer_ull(s, base, &tmp);
+               if (rv < 0)
+                       return rv;
+               if ((long long)tmp < 0)
+                       return -ERANGE;
+               *val = tmp;
+               return rv;
+       }
+}
+EXPORT_SYMBOL(_parse_integer_ll);
+
+int _parse_integer_u(const char *s, unsigned int base, unsigned int *val)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = _parse_integer_ull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned int)tmp)
+               return -ERANGE;
+       *val = tmp;
+       return rv;
+}
+EXPORT_SYMBOL(_parse_integer_u);
+
+int _parse_integer_i(const char *s, unsigned int base, int *val)
+{
+       long long tmp;
+       int rv;
+
+       rv = _parse_integer_ll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (int)tmp)
+               return -ERANGE;
+       *val = tmp;
+       return rv;
+}
+EXPORT_SYMBOL(_parse_integer_i);
+
+int _parse_integer_us(const char *s, unsigned int base, unsigned short *val)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = _parse_integer_ull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned short)tmp)
+               return -ERANGE;
+       *val = tmp;
+       return rv;
+}
+EXPORT_SYMBOL(_parse_integer_us);
+
+int _parse_integer_s(const char *s, unsigned int base, short *val)
+{
+       long long tmp;
+       int rv;
+
+       rv = _parse_integer_ll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (short)tmp)
+               return -ERANGE;
+       *val = tmp;
+       return rv;
+}
+EXPORT_SYMBOL(_parse_integer_s);
+
+int _parse_integer_uc(const char *s, unsigned int base, unsigned char *val)
+{
+       unsigned long long tmp;
+       int rv;
+
+       rv = _parse_integer_ull(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (unsigned char)tmp)
+               return -ERANGE;
+       *val = tmp;
+       return rv;
+}
+EXPORT_SYMBOL(_parse_integer_uc);
+
+int _parse_integer_sc(const char *s, unsigned int base, signed char *val)
+{
+       long long tmp;
+       int rv;
+
+       rv = _parse_integer_ll(s, base, &tmp);
+       if (rv < 0)
+               return rv;
+       if (tmp != (signed char)tmp)
+               return -ERANGE;
+       *val = tmp;
+       return rv;
+}
+EXPORT_SYMBOL(_parse_integer_sc);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to