There have been times in the past where we have been careless and allowed non-finite values to escape as a 'number' type in QMP output (such as before commit 27ff42e). This is in violation of the JSON specification in RFC 7159, and usually caused by floating-point division by 0 resulting in NaN, although infinity is also a possible culprit. While a future patch will tighten the output generator to avoid a lexical error from a strict JSON parser, we might as well first fix our parser to accept non-finite values as an extension, so that we can always read what we have output in the past ("be liberal in what you accept, strict in what you produce").
Rather than complicate the lexer to add LOTS of states for each letter within 'nan' and 'inf[inity]', I chose to just abuse the 'keyword' token, but had to make it accept upper case. Also, since I want to parse '-inf', I had to tweak IN_NEG_NONZERO_NUMBER; and renamed that in the process (we have always accepted '-0', so the state name was a misnomer). Then the parser then does the real magic of creating a QFloat object if a non-finite "keyword" was recognized. I intentionally did not support NAN(n-char-sequence) forms, even though C99 requires it to work for strtod(), in part because "implementation-specified" n-char-sequence is rather hard to test in a platform-agnostic manner, and in part because we've never actually output it. Improve the testsuite to cover the new extension, including working around the fact that g_assert_cmpfloat() does NOT test equivalence, but merely mathematical equality (0.0 and -0.0 are equal but not equivalent, NaN and NaN are equivalent but not equal), and enhance the existing tests for -0.0 in the process. Checkpatch has a false negative, complaining that there should be spaces around binary '-' in what is really the float literal -32.20e-10. It was over my head to figure out how to silence that one. Signed-off-by: Eric Blake <ebl...@redhat.com> --- qobject/json-lexer.c | 15 ++++++++----- qobject/json-parser.c | 13 +++++++++-- tests/check-qjson.c | 62 ++++++++++++++++++++++++++++++++++++++++++++------- docs/qmp-spec.txt | 4 ++++ 4 files changed, 79 insertions(+), 15 deletions(-) diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index ebd15d8..de16219 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -18,13 +18,14 @@ #define MAX_TOKEN_SIZE (64ULL << 20) /* - * Required by JSON (RFC 7159), plus \' extension in "": + * Required by JSON (RFC 7159), plus \' extension in "", and extension + * of parsing case-insensitive non-finite numbers like "NaN" and "-Inf": * * \"([^\\\"]|(\\\"|\\'|\\\\|\\/|\\b|\\f|\\n|\\r|\\t| * \\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\" * -?(0|[1-9][0-9]*)(.[0-9]+)?([eE][-+]?[0-9]+)? * [{}\[\],:] - * [a-z]+ # covers null, true, false + * -?[a-zA-Z]+ # covers null, true, false, nan, inf[inity] * * Extension of '' strings: * @@ -58,7 +59,7 @@ enum json_lexer_state { IN_MANTISSA, IN_MANTISSA_DIGITS, IN_NONZERO_NUMBER, - IN_NEG_NONZERO_NUMBER, + IN_NEG_NUMBER, IN_KEYWORD, IN_ESCAPE, IN_ESCAPE_L, @@ -206,15 +207,18 @@ static const uint8_t json_lexer[][256] = { ['.'] = IN_MANTISSA, }, - [IN_NEG_NONZERO_NUMBER] = { + [IN_NEG_NUMBER] = { ['0'] = IN_ZERO, ['1' ... '9'] = IN_NONZERO_NUMBER, + ['a' ... 'z'] = IN_KEYWORD, + ['A' ... 'Z'] = IN_KEYWORD, }, /* keywords */ [IN_KEYWORD] = { TERMINAL(JSON_KEYWORD), ['a' ... 'z'] = IN_KEYWORD, + ['A' ... 'Z'] = IN_KEYWORD, }, /* whitespace */ @@ -264,7 +268,7 @@ static const uint8_t json_lexer[][256] = { ['\''] = IN_SQ_STRING, ['0'] = IN_ZERO, ['1' ... '9'] = IN_NONZERO_NUMBER, - ['-'] = IN_NEG_NONZERO_NUMBER, + ['-'] = IN_NEG_NUMBER, ['{'] = JSON_LCURLY, ['}'] = JSON_RCURLY, ['['] = JSON_LSQUARE, @@ -272,6 +276,7 @@ static const uint8_t json_lexer[][256] = { [','] = JSON_COMMA, [':'] = JSON_COLON, ['a' ... 'z'] = IN_KEYWORD, + ['A' ... 'Z'] = IN_KEYWORD, ['%'] = IN_ESCAPE, [' '] = IN_WHITESPACE, ['\t'] = IN_WHITESPACE, diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 67ed727..12519b6 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -450,6 +450,16 @@ static QObject *parse_keyword(JSONParserContext *ctxt) return QOBJECT(qbool_from_bool(false)); } else if (!strcmp(token->str, "null")) { return qnull(); + } else { + double d; + char *p; + + /* The lexer feeds us "NaN" and "-Inf" as keywords */ + errno = 0; + d = strtod(token->str, &p); + if (!errno && p != token->str && !*p) { + return QOBJECT(qfloat_from_double(d)); + } } parse_error(ctxt, token, "invalid keyword '%s'", token->str); return NULL; @@ -519,8 +529,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) } case JSON_FLOAT: /* FIXME dependent on locale; a pervasive issue in QEMU */ - /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN, - * but those might be useful extensions beyond JSON */ + /* NaN and infinity are parsed as extensions under parse_keyword() */ return QOBJECT(qfloat_from_double(strtod(token->str, NULL))); default: abort(); diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 0e158f6..95adf64 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -11,6 +11,7 @@ * */ #include "qemu/osdep.h" +#include <math.h> #include "qapi/qmp/qstring.h" #include "qapi/qmp/qint.h" @@ -932,34 +933,78 @@ static void float_number(void) struct { const char *encoded; double decoded; - int skip; + const char *canonical; } test_cases[] = { { "32.43", 32.43 }, { "0.222", 0.222 }, { "-32.12313", -32.12313 }, - { "-32.20e-10", -32.20e-10, .skip = 1 }, + { "-32.20e-10", -32.20e-10, "-0" }, /* FIXME: Our rounding is lousy */ + { "-0.0", -0.0, "-0" }, { }, }; for (i = 0; test_cases[i].encoded; i++) { QObject *obj; QFloat *qfloat; + QString *str; obj = qobject_from_json(test_cases[i].encoded); g_assert(obj != NULL); g_assert(qobject_type(obj) == QTYPE_QFLOAT); qfloat = qobject_to_qfloat(obj); - g_assert(qfloat_get_double(qfloat) == test_cases[i].decoded); + g_assert_cmpfloat(qfloat_get_double(qfloat), ==, + test_cases[i].decoded); + g_assert_cmpint(signbit(qfloat_get_double(qfloat)), ==, + signbit(test_cases[i].decoded)); - if (test_cases[i].skip == 0) { - QString *str; + str = qobject_to_json(obj); + g_assert_cmpstr(qstring_get_str(str), ==, + test_cases[i].canonical ?: test_cases[i].encoded); + QDECREF(str); + QDECREF(qfloat); + } +} - str = qobject_to_json(obj); - g_assert(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0); - QDECREF(str); +static void non_finite_number(void) +{ + int i; + struct { + const char *encoded; + double decoded; + const char *canonical; + } test_cases[] = { + { "nan", NAN }, + { "NaN", NAN, "nan" }, + /* Not all libc implementations can round-trip '-nan' */ + /* We do not support forms like 'NAN(0)' */ + { "inf", INFINITY }, + { "-INFINITY", -INFINITY, "-inf" }, + { }, + }; + + for (i = 0; test_cases[i].encoded; i++) { + QObject *obj; + QFloat *qfloat; + QString *str; + + obj = qobject_from_json(test_cases[i].encoded); + g_assert(obj != NULL); + g_assert(qobject_type(obj) == QTYPE_QFLOAT); + + qfloat = qobject_to_qfloat(obj); + /* g_assert_cmpfloat(NAN, ==, NAN) doesn't do what we want */ + g_assert_cmpint(fpclassify(qfloat_get_double(qfloat)), ==, + fpclassify(test_cases[i].decoded)); + if (!isnan(test_cases[i].decoded)) { + g_assert_cmpfloat(qfloat_get_double(qfloat), ==, + test_cases[i].decoded); } + str = qobject_to_json(obj); + g_assert_cmpstr(qstring_get_str(str), ==, + test_cases[i].canonical ?: test_cases[i].encoded); + QDECREF(str); QDECREF(qfloat); } } @@ -1520,6 +1565,7 @@ int main(int argc, char **argv) g_test_add_func("/literals/number/simple", simple_number); g_test_add_func("/literals/number/float", float_number); + g_test_add_func("/literals/number/non-finite", non_finite_number); g_test_add_func("/literals/number/vararg", vararg_number); g_test_add_func("/literals/keyword", keyword_literal); diff --git a/docs/qmp-spec.txt b/docs/qmp-spec.txt index f8b5356..bfba431 100644 --- a/docs/qmp-spec.txt +++ b/docs/qmp-spec.txt @@ -51,6 +51,10 @@ json-string, and both input forms of strings understand an additional escape sequence of "\'" for a single quote. The server will only use double quoting on output. +As an extension, the server understands case-insensitive forms of +non-finite numbers, such as "NaN", "Inf", and "-infinity" (but not +"NaN(...)"). + 2.1 General Definitions ----------------------- -- 2.5.5