On Fri, 10 Aug 2012 18:24:10 -0500 Michael Roth <mdr...@linux.vnet.ibm.com> wrote:
> Currently, when parsing a stream of tokens we make a copy of the token > list at the beginning of each level of recursion so that we do not > modify the original list in cases where we need to fall back to an > earlier state. > > In the worst case, we will only read 1 or 2 tokens off the list before > recursing again, which means an upper bound of roughly N^2 token allocations. > > For a "reasonably" sized QMP request (in this a QMP representation of > cirrus_vga's device state, generated via QIDL, being passed in via > qom-set), this caused my 16GB's of memory to be exhausted before any > noticeable progress was made by the parser. The command is here for > reference, and can be issued against upstream QMP to reproduce (failure > occurs before any qmp command routing/execution): > > http://pastebin.com/mJrZ3Ctg > > This patch works around the issue by using single copy of the token list > in the form of an indexable array so that we can save/restore state by > manipulating indices. With this patch applied the above QMP/JSON request > can be parsed in under a second. Approach looks sane to me, a few review comments below. Anthony, could you provide your reviewed-by please? > > Tested with valgrind, make check, and QMP. > > Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> > --- > json-parser.c | 234 > +++++++++++++++++++++++++++++++++++---------------------- > 1 file changed, 146 insertions(+), 88 deletions(-) > > diff --git a/json-parser.c b/json-parser.c > index 849e215..b4e6a60 100644 > --- a/json-parser.c > +++ b/json-parser.c > @@ -27,6 +27,11 @@ > typedef struct JSONParserContext > { > Error *err; > + struct { > + QObject **buf; > + size_t pos; > + size_t count; > + } tokens; > } JSONParserContext; > > #define BUG_ON(cond) assert(!(cond)) > @@ -40,7 +45,7 @@ typedef struct JSONParserContext > * 4) deal with premature EOI > */ > > -static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list > *ap); > +static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); > > /** > * Token manipulators > @@ -270,27 +275,115 @@ out: > return NULL; > } > > +static QObject *parser_context_pop_token(JSONParserContext *ctxt) > +{ > + QObject *token; > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > + token = ctxt->tokens.buf[ctxt->tokens.pos]; > + ctxt->tokens.pos++; > + return token; > +} > + > +/* Note: parser_context_{peek|pop}_token do not increment the > + * token object's refcount. In both cases the references will continue > + * to be * tracked and cleanup in parser_context_free() > + */ > +static QObject *parser_context_peek_token(JSONParserContext *ctxt) > +{ > + QObject *token; > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > + token = ctxt->tokens.buf[ctxt->tokens.pos]; > + return token; > +} > + > +static JSONParserContext parser_context_save(JSONParserContext *ctxt) > +{ > + JSONParserContext saved_ctxt; > + saved_ctxt.tokens.pos = ctxt->tokens.pos; > + saved_ctxt.tokens.count = ctxt->tokens.count; > + saved_ctxt.tokens.buf = ctxt->tokens.buf; Shouldn't saved_ctxt.err be initialized to NULL? > + return saved_ctxt; I was going to say that saved_ctxt had to be static, but then I realized this will end up copying the struct to the caller. > +} > + > +static void parser_context_restore(JSONParserContext *ctxt, > + JSONParserContext saved_ctxt) > +{ > + ctxt->tokens.pos = saved_ctxt.tokens.pos; > + ctxt->tokens.count = saved_ctxt.tokens.count; > + ctxt->tokens.buf = saved_ctxt.tokens.buf; > +} > + > +static void tokens_count_from_iter(QObject *obj, void *opaque) > +{ > + size_t *count = opaque; > + (*count)++; > +} > + > +static void tokens_append_from_iter(QObject *obj, void *opaque) > +{ > + JSONParserContext *ctxt = opaque; > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > + ctxt->tokens.buf[ctxt->tokens.pos++] = obj; > + qobject_incref(obj); > +} > + > +static JSONParserContext *parser_context_new(QList *tokens) > +{ > + JSONParserContext *ctxt; > + size_t count = 0; > + > + if (!tokens) { > + return NULL; > + } > + > + qlist_iter(tokens, tokens_count_from_iter, &count); > + if (count == 0) { > + return NULL; > + } Please, add qlist_size() instead. > + > + ctxt = g_malloc0(sizeof(JSONParserContext)); > + ctxt->tokens.pos = 0; > + ctxt->tokens.count = count; > + ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); > + qlist_iter(tokens, tokens_append_from_iter, ctxt); > + ctxt->tokens.pos = 0; > + > + return ctxt; > +} > + > +static void parser_context_free(JSONParserContext *ctxt) > +{ > + int i; > + if (ctxt) { > + for (i = 0; i < ctxt->tokens.count; i++) { > + qobject_decref(ctxt->tokens.buf[i]); > + } > + g_free(ctxt->tokens.buf); > + g_free(ctxt); Isn't this leaking ctxt->err? > + } > +} > + > /** > * Parsing rules > */ > -static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, > va_list *ap) > +static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) > { > QObject *key = NULL, *token = NULL, *value, *peek; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - peek = qlist_peek(working); > + peek = parser_context_peek_token(ctxt); > if (peek == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > > - key = parse_value(ctxt, &working, ap); > + key = parse_value(ctxt, ap); > if (!key || qobject_type(key) != QTYPE_QSTRING) { > parse_error(ctxt, peek, "key is not a string in object"); > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -301,7 +394,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict > *dict, QList **tokens, va_l > goto out; > } > > - value = parse_value(ctxt, &working, ap); > + value = parse_value(ctxt, ap); > if (value == NULL) { > parse_error(ctxt, token, "Missing value in dict"); > goto out; > @@ -309,28 +402,24 @@ static int parse_pair(JSONParserContext *ctxt, QDict > *dict, QList **tokens, va_l > > qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); > > - qobject_decref(token); > qobject_decref(key); > - QDECREF(*tokens); > - *tokens = working; > > return 0; > > out: > - qobject_decref(token); > + parser_context_restore(ctxt, saved_ctxt); > qobject_decref(key); > - QDECREF(working); > > return -1; > } > > -static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, > va_list *ap) > +static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) > { > QDict *dict = NULL; > QObject *token, *peek; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -338,23 +427,22 @@ static QObject *parse_object(JSONParserContext *ctxt, > QList **tokens, va_list *a > if (!token_is_operator(token, '{')) { > goto out; > } > - qobject_decref(token); > token = NULL; > > dict = qdict_new(); > > - peek = qlist_peek(working); > + peek = parser_context_peek_token(ctxt); > if (peek == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > > if (!token_is_operator(peek, '}')) { > - if (parse_pair(ctxt, dict, &working, ap) == -1) { > + if (parse_pair(ctxt, dict, ap) == -1) { > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -365,59 +453,52 @@ static QObject *parse_object(JSONParserContext *ctxt, > QList **tokens, va_list *a > parse_error(ctxt, token, "expected separator in dict"); > goto out; > } > - qobject_decref(token); > token = NULL; > > - if (parse_pair(ctxt, dict, &working, ap) == -1) { > + if (parse_pair(ctxt, dict, ap) == -1) { > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > } > - qobject_decref(token); > token = NULL; > } else { > - token = qlist_pop(working); > - qobject_decref(token); > + token = parser_context_pop_token(ctxt); > token = NULL; > } > > - QDECREF(*tokens); > - *tokens = working; > - > return QOBJECT(dict); > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > QDECREF(dict); > return NULL; > } > > -static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list > *ap) > +static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) > { > QList *list = NULL; > QObject *token, *peek; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > > if (!token_is_operator(token, '[')) { > + token = NULL; > goto out; > } > - qobject_decref(token); > token = NULL; > > list = qlist_new(); > > - peek = qlist_peek(working); > + peek = parser_context_peek_token(ctxt); > if (peek == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -426,7 +507,7 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > if (!token_is_operator(peek, ']')) { > QObject *obj; > > - obj = parse_value(ctxt, &working, ap); > + obj = parse_value(ctxt, ap); > if (obj == NULL) { > parse_error(ctxt, token, "expecting value"); > goto out; > @@ -434,7 +515,7 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > > qlist_append_obj(list, obj); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -446,10 +527,9 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > goto out; > } > > - qobject_decref(token); > token = NULL; > > - obj = parse_value(ctxt, &working, ap); > + obj = parse_value(ctxt, ap); > if (obj == NULL) { > parse_error(ctxt, token, "expecting value"); > goto out; > @@ -457,39 +537,33 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > > qlist_append_obj(list, obj); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > } > > - qobject_decref(token); > token = NULL; > } else { > - token = qlist_pop(working); > - qobject_decref(token); > + token = parser_context_pop_token(ctxt); > token = NULL; > } > > - QDECREF(*tokens); > - *tokens = working; > - > return QOBJECT(list); > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > QDECREF(list); > return NULL; > } > > -static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens) > +static QObject *parse_keyword(JSONParserContext *ctxt) > { > QObject *token, *ret; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -507,29 +581,24 @@ static QObject *parse_keyword(JSONParserContext *ctxt, > QList **tokens) > goto out; > } > > - qobject_decref(token); > - QDECREF(*tokens); > - *tokens = working; > - > return ret; > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > > return NULL; > } > > -static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, > va_list *ap) > +static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) > { > QObject *token = NULL, *obj; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > if (ap == NULL) { > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -553,25 +622,20 @@ static QObject *parse_escape(JSONParserContext *ctxt, > QList **tokens, va_list *a > goto out; > } > > - qobject_decref(token); > - QDECREF(*tokens); > - *tokens = working; > - > return obj; > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > > return NULL; > } > > -static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens) > +static QObject *parse_literal(JSONParserContext *ctxt) > { > QObject *token, *obj; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -591,35 +655,30 @@ static QObject *parse_literal(JSONParserContext *ctxt, > QList **tokens) > goto out; > } > > - qobject_decref(token); > - QDECREF(*tokens); > - *tokens = working; > - > return obj; > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > > return NULL; > } > > -static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list > *ap) > +static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) > { > QObject *obj; > > - obj = parse_object(ctxt, tokens, ap); > + obj = parse_object(ctxt, ap); > if (obj == NULL) { > - obj = parse_array(ctxt, tokens, ap); > + obj = parse_array(ctxt, ap); > } > if (obj == NULL) { > - obj = parse_escape(ctxt, tokens, ap); > + obj = parse_escape(ctxt, ap); > } > if (obj == NULL) { > - obj = parse_keyword(ctxt, tokens); > + obj = parse_keyword(ctxt); > } > if (obj == NULL) { > - obj = parse_literal(ctxt, tokens); > + obj = parse_literal(ctxt); > } > > return obj; > @@ -632,19 +691,18 @@ QObject *json_parser_parse(QList *tokens, va_list *ap) > > QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) > { > - JSONParserContext ctxt = {}; > - QList *working; > + JSONParserContext *ctxt = parser_context_new(tokens); > QObject *result; > > - if (!tokens) { > + if (!ctxt) { > return NULL; > } > - working = qlist_copy(tokens); > - result = parse_value(&ctxt, &working, ap); > > - QDECREF(working); > + result = parse_value(ctxt, ap); > + > + error_propagate(errp, ctxt->err); > > - error_propagate(errp, ctxt.err); > + parser_context_free(ctxt); > > return result; > }