On Thu, Jan 17, 2013 at 7:06 PM, Eduardo Habkost <ehabk...@redhat.com> wrote: > There are lots of duplicate parsing code using strto*() in QEMU, and > most of that code is broken in one way or another. Even the visitors > code have duplicate integer parsing code[1]. This introduces functions > to help parsing unsigned int values: parse_uint() and parse_uint_full(). > > Parsing functions for signed ints and floats will be submitted later. > > parse_uint_full() has all the checks made by opts_type_uint64() at > opts-visitor.c: > > - Check for NULL (returns -EINVAL) > - Check for negative numbers (returns -ERANGE) > - Check for empty string (returns -EINVAL) > - Check for overflow or other errno values set by strtoll() (returns > -errno) > - Check for end of string (reject invalid characters after number) > (returns -EINVAL) > > parse_uint() does everything above except checking for the end of the > string, so callers can continue parsing the remainder of string after > the number. > > Unit tests included.
I think it would be pretty cool if we also used a string fuzzer to check the robustness of string parsers. > > [1] string-input-visitor.c:parse_int() could use the same parsing code > used by opts-visitor.c:opts_type_int(), instead of duplicating that > logic. > > Signed-off-by: Eduardo Habkost <ehabk...@redhat.com> > --- > Changes v2: > - Trivial whitespace change > - Add 'base' parameter to the functions > > Changes v4: > - Return -EINVAL in case a minus sign is found > - Make endptr point to beginning of string in case -EINVAL > is returned (like the strtoull() behavior) > > v3->v4 interdiff: > > diff --git a/tests/test-cutils.c b/tests/test-cutils.c > index f4f6951..f8395f4 100644 > --- a/tests/test-cutils.c > +++ b/tests/test-cutils.c > @@ -61,6 +61,22 @@ static void test_parse_uint_empty(void) > g_assert(endptr == str); > } > > +static void test_parse_uint_invalid(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = " \t xxx"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, -EINVAL); > + g_assert_cmpint(i, ==, 0); > + g_assert(endptr == str); > +} > + > + > static void test_parse_uint_trailing(void) > { > unsigned long long i = 999; > @@ -164,9 +180,9 @@ static void test_parse_uint_negative(void) > > r = parse_uint(str, &i, &endptr, 0); > > - g_assert_cmpint(r, ==, -ERANGE); > + g_assert_cmpint(r, ==, -EINVAL); > g_assert_cmpint(i, ==, 0); > - g_assert(endptr == str + 3); > + g_assert(endptr == str); > } > > > @@ -200,6 +216,7 @@ int main(int argc, char **argv) > > g_test_add_func("/cutils/parse_uint/null", test_parse_uint_null); > g_test_add_func("/cutils/parse_uint/empty", test_parse_uint_empty); > + g_test_add_func("/cutils/parse_uint/invalid", > test_parse_uint_invalid); > g_test_add_func("/cutils/parse_uint/trailing", > test_parse_uint_trailing); > g_test_add_func("/cutils/parse_uint/correct", > test_parse_uint_correct); > g_test_add_func("/cutils/parse_uint/octal", test_parse_uint_octal); > diff --git a/util/cutils.c b/util/cutils.c > index 7f09251..ce80a2a 100644 > --- a/util/cutils.c > +++ b/util/cutils.c > @@ -278,7 +278,7 @@ int64_t strtosz(const char *nptr, char **end) > * - Overflow errors or other errno values set by strtoull() will > * return -errno (-ERANGE in case of overflow). > * - Differently from strtoull(), values starting with a minus sign are > - * rejected (returning -ERANGE). > + * rejected (returning -EINVAL). > * > * Sets endptr to point to the first invalid character. Callers may rely > * on *value and *endptr to be always set by the function, even in case > of > @@ -294,6 +294,7 @@ int parse_uint(const char *s, unsigned long long > *value, char **endptr, > int r = 0; > char *endp = (char *)s; > unsigned long long val = 0; > + const char *sp; > > if (!s) { > r = -EINVAL; > @@ -301,11 +302,12 @@ int parse_uint(const char *s, unsigned long long > *value, char **endptr, > } > > /* make sure we reject negative numbers: */ > - while (isspace((unsigned char)*s)) { > - ++s; endp++; > + sp = s; > + while (isspace((unsigned char)*sp)) { > + ++sp; > } > - if (*s == '-') { > - r = -ERANGE; > + if (*sp == '-') { > + r = -EINVAL; > goto out; > } > > --- > include/qemu-common.h | 4 + > tests/Makefile | 3 + > tests/test-cutils.c | 233 > ++++++++++++++++++++++++++++++++++++++++++++++++++ > util/cutils.c | 81 ++++++++++++++++++ > 4 files changed, 321 insertions(+) > create mode 100644 tests/test-cutils.c > > diff --git a/include/qemu-common.h b/include/qemu-common.h > index ca464bb..f134629 100644 > --- a/include/qemu-common.h > +++ b/include/qemu-common.h > @@ -170,6 +170,10 @@ int qemu_fdatasync(int fd); > int fcntl_setfl(int fd, int flag); > int qemu_parse_fd(const char *param); > > +int parse_uint(const char *s, unsigned long long *value, char **endptr, > + int base); > +int parse_uint_full(const char *s, unsigned long long *value, int base); > + > /* > * strtosz() suffixes used to specify the default treatment of an > * argument passed to strtosz() without an explicit suffix. > diff --git a/tests/Makefile b/tests/Makefile > index d86e95a..e5929cd 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -45,6 +45,8 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c > gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c > check-unit-y += tests/test-thread-pool$(EXESUF) > gcov-files-test-thread-pool-y = thread-pool.c > +check-unit-y += tests/test-cutils$(EXESUF) > +gcov-files-test-cutils-y += util/cutils.c > > check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh > > @@ -86,6 +88,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o > $(block-obj-y) libqemuutil > tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a > libqemustub.a > tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) > libqemuutil.a libqemustub.a > tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a > +tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o > > tests/test-qapi-types.c tests/test-qapi-types.h :\ > $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py > diff --git a/tests/test-cutils.c b/tests/test-cutils.c > new file mode 100644 > index 0000000..f8395f4 > --- /dev/null > +++ b/tests/test-cutils.c > @@ -0,0 +1,233 @@ > +/* > + * cutils.c unit-tests > + * > + * Copyright (C) 2013 Red Hat Inc. > + * > + * Authors: > + * Eduardo Habkost <ehabk...@redhat.com> > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include <glib.h> > +#include <errno.h> > +#include <string.h> > + > +#include "qemu-common.h" > + > + > +static void test_parse_uint_null(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + int r; > + > + r = parse_uint(NULL, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, -EINVAL); > + g_assert_cmpint(i, ==, 0); > + g_assert(endptr == NULL); > +} > + > +static void test_parse_uint_empty(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = ""; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, -EINVAL); > + g_assert_cmpint(i, ==, 0); > + g_assert(endptr == str); > +} > + > +static void test_parse_uint_invalid(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = " \t xxx"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, -EINVAL); > + g_assert_cmpint(i, ==, 0); > + g_assert(endptr == str); > +} > + > + > +static void test_parse_uint_trailing(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = "123xxx"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, 0); > + g_assert_cmpint(i, ==, 123); > + g_assert(endptr == str + 3); > +} > + > +static void test_parse_uint_correct(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = "123"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, 0); > + g_assert_cmpint(i, ==, 123); > + g_assert(endptr == str + strlen(str)); > +} > + > +static void test_parse_uint_octal(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = "0123"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, 0); > + g_assert_cmpint(i, ==, 0123); > + g_assert(endptr == str + strlen(str)); > +} > + > +static void test_parse_uint_decimal(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = "0123"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 10); > + > + g_assert_cmpint(r, ==, 0); > + g_assert_cmpint(i, ==, 123); > + g_assert(endptr == str + strlen(str)); > +} > + > + > +static void test_parse_uint_llong_max(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + char *str = g_strdup_printf("%llu", (unsigned long long)LLONG_MAX + 1); > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, 0); > + g_assert_cmpint(i, ==, (unsigned long long)LLONG_MAX + 1); > + g_assert(endptr == str + strlen(str)); > + > + g_free(str); > +} > + > +static void test_parse_uint_overflow(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = "99999999999999999999999999999999999999"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, -ERANGE); > + g_assert_cmpint(i, ==, ULLONG_MAX); > + g_assert(endptr == str + strlen(str)); > +} > + > +static void test_parse_uint_negative(void) > +{ > + unsigned long long i = 999; > + char f = 'X'; > + char *endptr = &f; > + const char *str = " \t -321"; > + int r; > + > + r = parse_uint(str, &i, &endptr, 0); > + > + g_assert_cmpint(r, ==, -EINVAL); > + g_assert_cmpint(i, ==, 0); > + g_assert(endptr == str); > +} > + > + > +static void test_parse_uint_full_trailing(void) > +{ > + unsigned long long i = 999; > + const char *str = "123xxx"; > + int r; > + > + r = parse_uint_full(str, &i, 0); > + > + g_assert_cmpint(r, ==, -EINVAL); > + g_assert_cmpint(i, ==, 123); > +} > + > +static void test_parse_uint_full_correct(void) > +{ > + unsigned long long i = 999; > + const char *str = "123"; > + int r; > + > + r = parse_uint_full(str, &i, 0); > + > + g_assert_cmpint(r, ==, 0); > + g_assert_cmpint(i, ==, 123); > +} > + > +int main(int argc, char **argv) > +{ > + g_test_init(&argc, &argv, NULL); > + > + g_test_add_func("/cutils/parse_uint/null", test_parse_uint_null); > + g_test_add_func("/cutils/parse_uint/empty", test_parse_uint_empty); > + g_test_add_func("/cutils/parse_uint/invalid", test_parse_uint_invalid); > + g_test_add_func("/cutils/parse_uint/trailing", test_parse_uint_trailing); > + g_test_add_func("/cutils/parse_uint/correct", test_parse_uint_correct); > + g_test_add_func("/cutils/parse_uint/octal", test_parse_uint_octal); > + g_test_add_func("/cutils/parse_uint/decimal", test_parse_uint_decimal); > + g_test_add_func("/cutils/parse_uint/llong_max", > test_parse_uint_llong_max); > + g_test_add_func("/cutils/parse_uint/overflow", test_parse_uint_overflow); > + g_test_add_func("/cutils/parse_uint/negative", test_parse_uint_negative); > + g_test_add_func("/cutils/parse_uint_full/trailing", > + test_parse_uint_full_trailing); > + g_test_add_func("/cutils/parse_uint_full/correct", > + test_parse_uint_full_correct); > + > + return g_test_run(); > +} > diff --git a/util/cutils.c b/util/cutils.c > index 80bb1dc..ce80a2a 100644 > --- a/util/cutils.c > +++ b/util/cutils.c > @@ -270,6 +270,87 @@ int64_t strtosz(const char *nptr, char **end) > return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB); > } > > +/* Try to parse an unsigned integer > + * > + * Error checks done by the function: > + * - NULL pointer will return -EINVAL. > + * - Empty strings will return -EINVAL. > + * - Overflow errors or other errno values set by strtoull() will > + * return -errno (-ERANGE in case of overflow). > + * - Differently from strtoull(), values starting with a minus sign are > + * rejected (returning -EINVAL). > + * > + * Sets endptr to point to the first invalid character. Callers may rely > + * on *value and *endptr to be always set by the function, even in case of > + * errors. > + * > + * The 'base' parameter has the same meaning of 'base' on strtoull(). > + * > + * Returns 0 on success, negative errno value on error. > + */ > +int parse_uint(const char *s, unsigned long long *value, char **endptr, > + int base) > +{ > + int r = 0; > + char *endp = (char *)s; > + unsigned long long val = 0; > + const char *sp; > + > + if (!s) { > + r = -EINVAL; > + goto out; > + } > + > + /* make sure we reject negative numbers: */ > + sp = s; > + while (isspace((unsigned char)*sp)) { > + ++sp; > + } > + if (*sp == '-') { > + r = -EINVAL; > + goto out; > + } > + > + errno = 0; > + val = strtoull(s, &endp, base); > + if (errno) { > + r = -errno; > + goto out; > + } > + > + if (endp == s) { > + r = -EINVAL; > + goto out; > + } > + > +out: > + *value = val; > + *endptr = endp; > + return r; > +} > + > +/* Try to parse an unsigned integer, making sure the whole string is parsed > + * > + * Have the same behavior of parse_uint(), but with an additional check > + * for additional data after the parsed number (in that case, the function > + * will return -EINVAL). > + */ > +int parse_uint_full(const char *s, unsigned long long *value, int base) > +{ > + char *endp; > + int r; > + > + r = parse_uint(s, value, &endp, base); > + if (r < 0) { > + return r; > + } > + if (*endp) { > + return -EINVAL; > + } > + > + return 0; > +} > + > int qemu_parse_fd(const char *param) > { > int fd; > -- > 1.7.11.7 > >