Remove custom number parser and use C standard library instead. In order to keep compatibility with earlier versions of the parser, we have to take into account a couple of quirks:
- We did not consider "negative" numbers to be valid for anything other than base-10 numbers, whereas C standard library does. Adjust the tests to match the new behavior. - We did not consider numbers such as "+4" to be valid, whereas C standard library does. Adjust the tests to match the new behavior. - C standard library's strtoull does not do range checks on negative numbers, so we have to parse knowingly-negative numbers as signed. - Some C standard library versions do not support binary numbers, so we keep around the relevant parts of the custom parser in place to support them. However, since those libc versions that do support them also support them being negative, allow negative binary in our parser as well. Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com> --- Notes: v9 -> v10: - Fixed commit message not reflecting changes for v9 - Reworked enum range check to avoid compile issues on some platforms v8 -> v9: - glibc 2.38 supports binary formats (0bxxxx and -0bxxxx) under certain conditions, so in order to ensure the same functionality on all glic versions, add support for positive and negative binary formats, and adjust tests accordingly v7 -> v8: - Added the commented-out out-of-bounds check back - Replaced debug print messages to ensure they don't attempt to index the num_help[] array (should fix compile errors) v5 -> v6: - Allowed more negative numbers (such as negative octals or hex) - Updated unit tests to check new cases - Small refactoring of code to reduce amount of noise - More verbose debug output v4 -> v5: - Added this commit app/test/test_cmdline_num.c | 34 ++- lib/cmdline/cmdline_parse_num.c | 395 +++++++++++++++++--------------- 2 files changed, 238 insertions(+), 191 deletions(-) diff --git a/app/test/test_cmdline_num.c b/app/test/test_cmdline_num.c index 9276de59bd..100c564188 100644 --- a/app/test/test_cmdline_num.c +++ b/app/test/test_cmdline_num.c @@ -139,6 +139,9 @@ const struct num_signed_str num_valid_negative_strs[] = { {"-2147483648", INT32_MIN }, {"-2147483649", INT32_MIN - 1LL }, {"-9223372036854775808", INT64_MIN }, + {"-0x8000000000000000", INT64_MIN }, + {"-01000000000000000000000", INT64_MIN }, + {"-0b1000000000000000000000000000000000000000000000000000000000000000", INT64_MIN }, }; const struct num_unsigned_str num_garbage_positive_strs[] = { @@ -175,12 +178,40 @@ const struct num_unsigned_str num_garbage_positive_strs[] = { const struct num_signed_str num_garbage_negative_strs[] = { /* valid strings with garbage on the end, should still be valid */ + /* negative numbers */ {"-9223372036854775808\0garbage", INT64_MIN }, {"-9223372036854775808\rgarbage", INT64_MIN }, {"-9223372036854775808\tgarbage", INT64_MIN }, {"-9223372036854775808\ngarbage", INT64_MIN }, {"-9223372036854775808#garbage", INT64_MIN }, {"-9223372036854775808 garbage", INT64_MIN }, + /* negative hex */ + {"-0x8000000000000000\0garbage", INT64_MIN }, + {"-0x8000000000000000\rgarbage", INT64_MIN }, + {"-0x8000000000000000\tgarbage", INT64_MIN }, + {"-0x8000000000000000\ngarbage", INT64_MIN }, + {"-0x8000000000000000#garbage", INT64_MIN }, + {"-0x8000000000000000 garbage", INT64_MIN }, + /* negative binary */ + {"-0b1000000000000000000000000000000000000000000000000000000000000000\0garbage", + INT64_MIN }, + {"-0b1000000000000000000000000000000000000000000000000000000000000000\rgarbage", + INT64_MIN }, + {"-0b1000000000000000000000000000000000000000000000000000000000000000\tgarbage", + INT64_MIN }, + {"-0b1000000000000000000000000000000000000000000000000000000000000000\ngarbage", + INT64_MIN }, + {"-0b1000000000000000000000000000000000000000000000000000000000000000#garbage", + INT64_MIN }, + {"-0b1000000000000000000000000000000000000000000000000000000000000000 garbage", + INT64_MIN }, + /* negative octal */ + {"-01000000000000000000000\0garbage", INT64_MIN }, + {"-01000000000000000000000\rgarbage", INT64_MIN }, + {"-01000000000000000000000\tgarbage", INT64_MIN }, + {"-01000000000000000000000\ngarbage", INT64_MIN }, + {"-01000000000000000000000#garbage", INT64_MIN }, + {"-01000000000000000000000 garbage", INT64_MIN }, }; const char * num_invalid_strs[] = { @@ -197,15 +228,12 @@ const char * num_invalid_strs[] = { "0b01110101017001", /* false negative numbers */ "-12345F623", - "-0x1234580A", - "-0b0111010101", /* too long (128+ chars) */ ("0b1111000011110000111100001111000011110000111100001111000011110000" "1111000011110000111100001111000011110000111100001111000011110000"), "1E3", "0A", "-B", - "+4", "1.23G", "", " ", diff --git a/lib/cmdline/cmdline_parse_num.c b/lib/cmdline/cmdline_parse_num.c index f21796bedb..88435d069e 100644 --- a/lib/cmdline/cmdline_parse_num.c +++ b/lib/cmdline/cmdline_parse_num.c @@ -4,6 +4,7 @@ * All rights reserved. */ +#include <errno.h> #include <stdio.h> #include <stdint.h> #include <inttypes.h> @@ -29,23 +30,6 @@ struct cmdline_token_ops cmdline_token_num_ops = { .get_help = cmdline_get_help_num, }; -enum num_parse_state_t { - START, - DEC_NEG, - BIN, - HEX, - - ERROR, - - FIRST_OK, /* not used */ - ZERO_OK, - HEX_OK, - OCTAL_OK, - BIN_OK, - DEC_NEG_OK, - DEC_POS_OK, -}; - /* Keep it sync with enum in .h */ static const char * num_help[] = { "UINT8", "UINT16", "UINT32", "UINT64", @@ -53,13 +37,13 @@ static const char * num_help[] = { }; static inline int -add_to_res(unsigned int c, uint64_t *res, unsigned int base) +add_to_bin(unsigned int c, uint64_t *res) { /* overflow */ - if ((UINT64_MAX - c) / base < *res) + if ((UINT64_MAX - c) / 2 < *res) return -1; - *res = (uint64_t) (*res * base + c); + *res = (uint64_t) (*res * 2 + c); return 0; } @@ -88,138 +72,142 @@ check_res_size(struct cmdline_token_num_data *nd, unsigned ressize) return -1; break; default: + debug_printf("Wrong number type: %d\n", nd->type); return -1; } return 0; } -/* parse an int */ -RTE_EXPORT_SYMBOL(cmdline_parse_num) -int -cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, - unsigned ressize) +static int +validate_type(enum cmdline_numtype type) { - struct cmdline_token_num_data nd; - enum num_parse_state_t st = START; + if (type < RTE_UINT8 || type > RTE_INT64) + return -1; + /* ensure no buffer overrun can occur */ + if ((uint64_t) type >= RTE_DIM(num_help)) + return -1; + return 0; +} + +static int +check_parsed_num(enum cmdline_numtype type, int neg, uint64_t uintres) +{ + int lo_ok, hi_ok; + + switch (type) { + case RTE_UINT8: + lo_ok = !neg; + hi_ok = uintres <= UINT8_MAX; + break; + case RTE_UINT16: + lo_ok = !neg; + hi_ok = uintres <= UINT16_MAX; + break; + case RTE_UINT32: + lo_ok = !neg; + hi_ok = uintres <= UINT32_MAX; + break; + case RTE_UINT64: + lo_ok = !neg; + hi_ok = 1; /* can't be out of range if parsed successfully */ + break; + case RTE_INT8: + lo_ok = !neg || (int64_t)uintres >= INT8_MIN; + hi_ok = neg || uintres <= INT8_MAX; + break; + case RTE_INT16: + lo_ok = !neg || (int64_t)uintres >= INT16_MIN; + hi_ok = neg || uintres <= INT16_MAX; + break; + case RTE_INT32: + lo_ok = !neg || (int64_t)uintres >= INT32_MIN; + hi_ok = neg || uintres <= INT32_MAX; + break; + case RTE_INT64: + lo_ok = 1; /* can't be out of range if parsed successfully */ + hi_ok = neg || uintres <= INT64_MAX; + break; + default: + debug_printf("Wrong number type\n"); + return -1; + } + /* check ranges */ + if (!lo_ok || !hi_ok) + return -1; + return 0; +} + +static int +parse_num(const char *srcbuf, uint64_t *resptr) +{ + uint64_t uintres; + char *end; + int neg = *srcbuf == '-'; + + /* + * strtoull does not do range checks on negative numbers, so we need to + * use strtoll if we know the value we're parsing looks like a negative + * one. we use base 0 for both, 0 means autodetect base. + */ + errno = 0; + if (neg) + uintres = (uint64_t)strtoll(srcbuf, &end, 0); + else + uintres = strtoull(srcbuf, &end, 0); + + if (end == srcbuf || !cmdline_isendoftoken(*end) || errno == ERANGE) + return -1; + + *resptr = uintres; + return end - srcbuf; +} + +static int +parse_bin(const char *srcbuf, uint64_t *res) +{ + uint64_t uintres = 0; + enum { + ERROR, + START, + BIN, + NEG, + ZERO_OK, + BIN_OK, + } st = START; const char * buf; char c; - uint64_t res1 = 0; - - if (!tk) - return -1; - - if (!srcbuf || !*srcbuf) - return -1; + int neg = 0; buf = srcbuf; c = *buf; - - memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd)); - - /* check that we have enough room in res */ - if (res) { - if (check_res_size(&nd, ressize) < 0) - return -1; - } - while (st != ERROR && c && !cmdline_isendoftoken(c)) { debug_printf("%c %x -> ", c, c); switch (st) { case START: - if (c == '-') { - st = DEC_NEG; - } - else if (c == '0') { + if (c == '0') { st = ZERO_OK; + } else if (c == '-') { + neg = 1; + st = NEG; + } + else { + st = ERROR; } - else if (c >= '1' && c <= '9') { - if (add_to_res(c - '0', &res1, 10) < 0) - st = ERROR; - else - st = DEC_POS_OK; + break; + + case NEG: + if (c == '0') { + st = ZERO_OK; } - else { + else { st = ERROR; } break; case ZERO_OK: - if (c == 'x') { - st = HEX; - } - else if (c == 'b') { + if (c == 'b') { st = BIN; } - else if (c >= '0' && c <= '7') { - if (add_to_res(c - '0', &res1, 10) < 0) - st = ERROR; - else - st = OCTAL_OK; - } - else { - st = ERROR; - } - break; - - case DEC_NEG: - if (c >= '0' && c <= '9') { - if (add_to_res(c - '0', &res1, 10) < 0) - st = ERROR; - else - st = DEC_NEG_OK; - } - else { - st = ERROR; - } - break; - - case DEC_NEG_OK: - if (c >= '0' && c <= '9') { - if (add_to_res(c - '0', &res1, 10) < 0) - st = ERROR; - } - else { - st = ERROR; - } - break; - - case DEC_POS_OK: - if (c >= '0' && c <= '9') { - if (add_to_res(c - '0', &res1, 10) < 0) - st = ERROR; - } - else { - st = ERROR; - } - break; - - case HEX: - st = HEX_OK; - /* fall-through */ - case HEX_OK: - if (c >= '0' && c <= '9') { - if (add_to_res(c - '0', &res1, 16) < 0) - st = ERROR; - } - else if (c >= 'a' && c <= 'f') { - if (add_to_res(c - 'a' + 10, &res1, 16) < 0) - st = ERROR; - } - else if (c >= 'A' && c <= 'F') { - if (add_to_res(c - 'A' + 10, &res1, 16) < 0) - st = ERROR; - } - else { - st = ERROR; - } - break; - - - case OCTAL_OK: - if (c >= '0' && c <= '7') { - if (add_to_res(c - '0', &res1, 8) < 0) - st = ERROR; - } else { st = ERROR; } @@ -230,7 +218,7 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, /* fall-through */ case BIN_OK: if (c >= '0' && c <= '1') { - if (add_to_res(c - '0', &res1, 2) < 0) + if (add_to_bin(c - '0', &uintres) < 0) st = ERROR; } else { @@ -239,10 +227,10 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, break; default: debug_printf("not impl "); - + st = ERROR; } - debug_printf("(%"PRIu64")\n", res1); + debug_printf("(%"PRIu64")\n", uintres); buf ++; c = *buf; @@ -252,68 +240,100 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, return -1; } - switch (st) { - case ZERO_OK: - case DEC_POS_OK: - case HEX_OK: - case OCTAL_OK: - case BIN_OK: - if (nd.type == RTE_INT8 && res1 <= INT8_MAX) { - if (res) *(int8_t *)res = (int8_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_INT16 && res1 <= INT16_MAX) { - if (res) *(int16_t *)res = (int16_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_INT32 && res1 <= INT32_MAX) { - if (res) *(int32_t *)res = (int32_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_INT64 && res1 <= INT64_MAX) { - if (res) *(int64_t *)res = (int64_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_UINT8 && res1 <= UINT8_MAX) { - if (res) *(uint8_t *)res = (uint8_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_UINT16 && res1 <= UINT16_MAX) { - if (res) *(uint16_t *)res = (uint16_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_UINT32 && res1 <= UINT32_MAX) { - if (res) *(uint32_t *)res = (uint32_t) res1; - return buf-srcbuf; - } else if (nd.type == RTE_UINT64) { - if (res) *(uint64_t *)res = res1; - return buf-srcbuf; - } else { - return -1; - } - break; + if (st != BIN_OK) + return -1; + + /* was it negative? */ + if (neg) + uintres = -uintres; + + *res = uintres; + return buf - srcbuf; +} - case DEC_NEG_OK: - if (nd.type == RTE_INT8 && - res1 <= INT8_MAX + 1) { - if (res) *(int8_t *)res = (int8_t) (-res1); - return buf-srcbuf; - } else if (nd.type == RTE_INT16 && - res1 <= (uint16_t)INT16_MAX + 1) { - if (res) *(int16_t *)res = (int16_t) (-res1); - return buf-srcbuf; - } else if (nd.type == RTE_INT32 && - res1 <= (uint32_t)INT32_MAX + 1) { - if (res) *(int32_t *)res = (int32_t) (-res1); - return buf-srcbuf; - } else if (nd.type == RTE_INT64 && - res1 <= (uint64_t)INT64_MAX + 1) { - if (res) *(int64_t *)res = (int64_t) (-res1); - return buf-srcbuf; - } else { - return -1; - } +static int +write_num(enum cmdline_numtype type, void *res, uint64_t uintres) +{ + switch (type) { + case RTE_UINT8: + *(uint8_t *)res = (uint8_t)uintres; + break; + case RTE_UINT16: + *(uint16_t *)res = (uint16_t)uintres; + break; + case RTE_UINT32: + *(uint32_t *)res = (uint32_t)uintres; + break; + case RTE_UINT64: + *(uint64_t *)res = uintres; + break; + case RTE_INT8: + *(int8_t *)res = (int8_t)uintres; + break; + case RTE_INT16: + *(int16_t *)res = (int16_t)uintres; + break; + case RTE_INT32: + *(int32_t *)res = (int32_t)uintres; + break; + case RTE_INT64: + *(int64_t *)res = (int64_t)uintres; break; default: - debug_printf("error\n"); + debug_printf("Wrong number type\n"); return -1; } + return 0; } +/* parse an int */ +RTE_EXPORT_SYMBOL(cmdline_parse_num) +int +cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, + unsigned ressize) +{ + struct cmdline_token_num_data nd; + + if (!tk) + return -1; + + if (!srcbuf || !*srcbuf) + return -1; + + memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd)); + + if (validate_type(nd.type) < 0) + return -1; + + /* check that we have enough room in res */ + if (res && check_res_size(&nd, ressize) < 0) + return -1; + + if (nd.type >= RTE_UINT8 && nd.type <= RTE_INT64) { + int ret, neg = *srcbuf == '-'; + uint64_t uintres; + + /* try parsing as number */ + ret = parse_num(srcbuf, &uintres); + + if (ret < 0) { + /* parse failed, try parsing as binary */ + ret = parse_bin(srcbuf, &uintres); + if (ret < 0) + return -1; + } + /* check if we're within valid range */ + if (check_parsed_num(nd.type, neg, uintres) < 0) + return -1; + + /* parsing succeeded, write the value if necessary */ + if (res && write_num(nd.type, res, uintres) < 0) + return -1; + + return ret; + } + return -1; +} /* parse an int */ RTE_EXPORT_SYMBOL(cmdline_get_help_num) @@ -328,9 +348,8 @@ cmdline_get_help_num(cmdline_parse_token_hdr_t *tk, char *dstbuf, unsigned int s memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd)); - /* should not happen.... don't so this test */ - /* if (nd.type >= (sizeof(num_help)/sizeof(const char *))) */ - /* return -1; */ + if (validate_type(nd.type) < 0) + return -1; ret = strlcpy(dstbuf, num_help[nd.type], size); if (ret < 0) -- 2.47.1