On Mon, Aug 13, 2012 at 03:01:56PM -0300, Luiz Capitulino wrote: > On Fri, 10 Aug 2012 18:24:10 -0500 > Michael Roth <mdr...@linux.vnet.ibm.com> wrote: > > > Currently, when parsing a stream of tokens we make a copy of the token > > list at the beginning of each level of recursion so that we do not > > modify the original list in cases where we need to fall back to an > > earlier state. > > > > In the worst case, we will only read 1 or 2 tokens off the list before > > recursing again, which means an upper bound of roughly N^2 token > > allocations. > > > > For a "reasonably" sized QMP request (in this a QMP representation of > > cirrus_vga's device state, generated via QIDL, being passed in via > > qom-set), this caused my 16GB's of memory to be exhausted before any > > noticeable progress was made by the parser. The command is here for > > reference, and can be issued against upstream QMP to reproduce (failure > > occurs before any qmp command routing/execution): > > > > http://pastebin.com/mJrZ3Ctg > > > > This patch works around the issue by using single copy of the token list > > in the form of an indexable array so that we can save/restore state by > > manipulating indices. With this patch applied the above QMP/JSON request > > can be parsed in under a second. > > Approach looks sane to me, a few review comments below. > > Anthony, could you provide your reviewed-by please? > > > > > Tested with valgrind, make check, and QMP. > > > > Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> > > --- > > json-parser.c | 234 > > +++++++++++++++++++++++++++++++++++---------------------- > > 1 file changed, 146 insertions(+), 88 deletions(-) > > > > diff --git a/json-parser.c b/json-parser.c > > index 849e215..b4e6a60 100644 > > --- a/json-parser.c > > +++ b/json-parser.c > > @@ -27,6 +27,11 @@ > > typedef struct JSONParserContext > > { > > Error *err; > > + struct { > > + QObject **buf; > > + size_t pos; > > + size_t count; > > + } tokens; > > } JSONParserContext; > > > > #define BUG_ON(cond) assert(!(cond)) > > @@ -40,7 +45,7 @@ typedef struct JSONParserContext > > * 4) deal with premature EOI > > */ > > > > -static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, > > va_list *ap); > > +static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); > > > > /** > > * Token manipulators > > @@ -270,27 +275,115 @@ out: > > return NULL; > > } > > > > +static QObject *parser_context_pop_token(JSONParserContext *ctxt) > > +{ > > + QObject *token; > > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > > + token = ctxt->tokens.buf[ctxt->tokens.pos]; > > + ctxt->tokens.pos++; > > + return token; > > +} > > + > > +/* Note: parser_context_{peek|pop}_token do not increment the > > + * token object's refcount. In both cases the references will continue > > + * to be * tracked and cleanup in parser_context_free() > > + */ > > +static QObject *parser_context_peek_token(JSONParserContext *ctxt) > > +{ > > + QObject *token; > > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > > + token = ctxt->tokens.buf[ctxt->tokens.pos]; > > + return token; > > +} > > + > > +static JSONParserContext parser_context_save(JSONParserContext *ctxt) > > +{ > > + JSONParserContext saved_ctxt; > > + saved_ctxt.tokens.pos = ctxt->tokens.pos; > > + saved_ctxt.tokens.count = ctxt->tokens.count; > > + saved_ctxt.tokens.buf = ctxt->tokens.buf; > > Shouldn't saved_ctxt.err be initialized to NULL? >
Yes. > > + return saved_ctxt; > > I was going to say that saved_ctxt had to be static, but then I realized > this will end up copying the struct to the caller. > Yah, every caller needs to manage it's own context, so I went with passing by copy since allocating/deallocating these "half-contexts" (we only care about token state with these) seems unecessary. seemed > > +} > > + > > +static void parser_context_restore(JSONParserContext *ctxt, > > + JSONParserContext saved_ctxt) > > +{ > > + ctxt->tokens.pos = saved_ctxt.tokens.pos; > > + ctxt->tokens.count = saved_ctxt.tokens.count; > > + ctxt->tokens.buf = saved_ctxt.tokens.buf; > > +} > > + > > +static void tokens_count_from_iter(QObject *obj, void *opaque) > > +{ > > + size_t *count = opaque; > > + (*count)++; > > +} > > + > > +static void tokens_append_from_iter(QObject *obj, void *opaque) > > +{ > > + JSONParserContext *ctxt = opaque; > > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > > + ctxt->tokens.buf[ctxt->tokens.pos++] = obj; > > + qobject_incref(obj); > > +} > > + > > +static JSONParserContext *parser_context_new(QList *tokens) > > +{ > > + JSONParserContext *ctxt; > > + size_t count = 0; > > + > > + if (!tokens) { > > + return NULL; > > + } > > + > > + qlist_iter(tokens, tokens_count_from_iter, &count); > > + if (count == 0) { > > + return NULL; > > + } > > Please, add qlist_size() instead. > Gladly :) I spent a good amount for a function that did this, not sure how I missed it. > > + > > + ctxt = g_malloc0(sizeof(JSONParserContext)); > > + ctxt->tokens.pos = 0; > > + ctxt->tokens.count = count; > > + ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); > > + qlist_iter(tokens, tokens_append_from_iter, ctxt); > > + ctxt->tokens.pos = 0; > > + > > + return ctxt; > > +} > > + > > +static void parser_context_free(JSONParserContext *ctxt) > > +{ > > + int i; > > + if (ctxt) { > > + for (i = 0; i < ctxt->tokens.count; i++) { > > + qobject_decref(ctxt->tokens.buf[i]); > > + } > > + g_free(ctxt->tokens.buf); > > + g_free(ctxt); > > Isn't this leaking ctxt->err? > Indeed. Looks like we were leaking this previously as well. I'll get it in V2. > > + } > > +} > > + > > /** > > * Parsing rules > > */ > > -static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList > > **tokens, va_list *ap) > > +static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) > > { > > QObject *key = NULL, *token = NULL, *value, *peek; > > - QList *working = qlist_copy(*tokens); > > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > > > - peek = qlist_peek(working); > > + peek = parser_context_peek_token(ctxt); > > if (peek == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > } > > > > - key = parse_value(ctxt, &working, ap); > > + key = parse_value(ctxt, ap); > > if (!key || qobject_type(key) != QTYPE_QSTRING) { > > parse_error(ctxt, peek, "key is not a string in object"); > > goto out; > > } > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > @@ -301,7 +394,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict > > *dict, QList **tokens, va_l > > goto out; > > } > > > > - value = parse_value(ctxt, &working, ap); > > + value = parse_value(ctxt, ap); > > if (value == NULL) { > > parse_error(ctxt, token, "Missing value in dict"); > > goto out; > > @@ -309,28 +402,24 @@ static int parse_pair(JSONParserContext *ctxt, QDict > > *dict, QList **tokens, va_l > > > > qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); > > > > - qobject_decref(token); > > qobject_decref(key); > > - QDECREF(*tokens); > > - *tokens = working; > > > > return 0; > > > > out: > > - qobject_decref(token); > > + parser_context_restore(ctxt, saved_ctxt); > > qobject_decref(key); > > - QDECREF(working); > > > > return -1; > > } > > > > -static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, > > va_list *ap) > > +static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) > > { > > QDict *dict = NULL; > > QObject *token, *peek; > > - QList *working = qlist_copy(*tokens); > > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > goto out; > > } > > @@ -338,23 +427,22 @@ static QObject *parse_object(JSONParserContext *ctxt, > > QList **tokens, va_list *a > > if (!token_is_operator(token, '{')) { > > goto out; > > } > > - qobject_decref(token); > > token = NULL; > > > > dict = qdict_new(); > > > > - peek = qlist_peek(working); > > + peek = parser_context_peek_token(ctxt); > > if (peek == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > } > > > > if (!token_is_operator(peek, '}')) { > > - if (parse_pair(ctxt, dict, &working, ap) == -1) { > > + if (parse_pair(ctxt, dict, ap) == -1) { > > goto out; > > } > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > @@ -365,59 +453,52 @@ static QObject *parse_object(JSONParserContext *ctxt, > > QList **tokens, va_list *a > > parse_error(ctxt, token, "expected separator in dict"); > > goto out; > > } > > - qobject_decref(token); > > token = NULL; > > > > - if (parse_pair(ctxt, dict, &working, ap) == -1) { > > + if (parse_pair(ctxt, dict, ap) == -1) { > > goto out; > > } > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > } > > } > > - qobject_decref(token); > > token = NULL; > > } else { > > - token = qlist_pop(working); > > - qobject_decref(token); > > + token = parser_context_pop_token(ctxt); > > token = NULL; > > } > > > > - QDECREF(*tokens); > > - *tokens = working; > > - > > return QOBJECT(dict); > > > > out: > > - qobject_decref(token); > > - QDECREF(working); > > + parser_context_restore(ctxt, saved_ctxt); > > QDECREF(dict); > > return NULL; > > } > > > > -static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, > > va_list *ap) > > +static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) > > { > > QList *list = NULL; > > QObject *token, *peek; > > - QList *working = qlist_copy(*tokens); > > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > goto out; > > } > > > > if (!token_is_operator(token, '[')) { > > + token = NULL; > > goto out; > > } > > - qobject_decref(token); > > token = NULL; > > > > list = qlist_new(); > > > > - peek = qlist_peek(working); > > + peek = parser_context_peek_token(ctxt); > > if (peek == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > @@ -426,7 +507,7 @@ static QObject *parse_array(JSONParserContext *ctxt, > > QList **tokens, va_list *ap > > if (!token_is_operator(peek, ']')) { > > QObject *obj; > > > > - obj = parse_value(ctxt, &working, ap); > > + obj = parse_value(ctxt, ap); > > if (obj == NULL) { > > parse_error(ctxt, token, "expecting value"); > > goto out; > > @@ -434,7 +515,7 @@ static QObject *parse_array(JSONParserContext *ctxt, > > QList **tokens, va_list *ap > > > > qlist_append_obj(list, obj); > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > @@ -446,10 +527,9 @@ static QObject *parse_array(JSONParserContext *ctxt, > > QList **tokens, va_list *ap > > goto out; > > } > > > > - qobject_decref(token); > > token = NULL; > > > > - obj = parse_value(ctxt, &working, ap); > > + obj = parse_value(ctxt, ap); > > if (obj == NULL) { > > parse_error(ctxt, token, "expecting value"); > > goto out; > > @@ -457,39 +537,33 @@ static QObject *parse_array(JSONParserContext *ctxt, > > QList **tokens, va_list *ap > > > > qlist_append_obj(list, obj); > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > parse_error(ctxt, NULL, "premature EOI"); > > goto out; > > } > > } > > > > - qobject_decref(token); > > token = NULL; > > } else { > > - token = qlist_pop(working); > > - qobject_decref(token); > > + token = parser_context_pop_token(ctxt); > > token = NULL; > > } > > > > - QDECREF(*tokens); > > - *tokens = working; > > - > > return QOBJECT(list); > > > > out: > > - qobject_decref(token); > > - QDECREF(working); > > + parser_context_restore(ctxt, saved_ctxt); > > QDECREF(list); > > return NULL; > > } > > > > -static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens) > > +static QObject *parse_keyword(JSONParserContext *ctxt) > > { > > QObject *token, *ret; > > - QList *working = qlist_copy(*tokens); > > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > goto out; > > } > > @@ -507,29 +581,24 @@ static QObject *parse_keyword(JSONParserContext > > *ctxt, QList **tokens) > > goto out; > > } > > > > - qobject_decref(token); > > - QDECREF(*tokens); > > - *tokens = working; > > - > > return ret; > > > > out: > > - qobject_decref(token); > > - QDECREF(working); > > + parser_context_restore(ctxt, saved_ctxt); > > > > return NULL; > > } > > > > -static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, > > va_list *ap) > > +static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) > > { > > QObject *token = NULL, *obj; > > - QList *working = qlist_copy(*tokens); > > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > > > if (ap == NULL) { > > goto out; > > } > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > goto out; > > } > > @@ -553,25 +622,20 @@ static QObject *parse_escape(JSONParserContext *ctxt, > > QList **tokens, va_list *a > > goto out; > > } > > > > - qobject_decref(token); > > - QDECREF(*tokens); > > - *tokens = working; > > - > > return obj; > > > > out: > > - qobject_decref(token); > > - QDECREF(working); > > + parser_context_restore(ctxt, saved_ctxt); > > > > return NULL; > > } > > > > -static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens) > > +static QObject *parse_literal(JSONParserContext *ctxt) > > { > > QObject *token, *obj; > > - QList *working = qlist_copy(*tokens); > > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > > > - token = qlist_pop(working); > > + token = parser_context_pop_token(ctxt); > > if (token == NULL) { > > goto out; > > } > > @@ -591,35 +655,30 @@ static QObject *parse_literal(JSONParserContext > > *ctxt, QList **tokens) > > goto out; > > } > > > > - qobject_decref(token); > > - QDECREF(*tokens); > > - *tokens = working; > > - > > return obj; > > > > out: > > - qobject_decref(token); > > - QDECREF(working); > > + parser_context_restore(ctxt, saved_ctxt); > > > > return NULL; > > } > > > > -static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, > > va_list *ap) > > +static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) > > { > > QObject *obj; > > > > - obj = parse_object(ctxt, tokens, ap); > > + obj = parse_object(ctxt, ap); > > if (obj == NULL) { > > - obj = parse_array(ctxt, tokens, ap); > > + obj = parse_array(ctxt, ap); > > } > > if (obj == NULL) { > > - obj = parse_escape(ctxt, tokens, ap); > > + obj = parse_escape(ctxt, ap); > > } > > if (obj == NULL) { > > - obj = parse_keyword(ctxt, tokens); > > + obj = parse_keyword(ctxt); > > } > > if (obj == NULL) { > > - obj = parse_literal(ctxt, tokens); > > + obj = parse_literal(ctxt); > > } > > > > return obj; > > @@ -632,19 +691,18 @@ QObject *json_parser_parse(QList *tokens, va_list *ap) > > > > QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) > > { > > - JSONParserContext ctxt = {}; > > - QList *working; > > + JSONParserContext *ctxt = parser_context_new(tokens); > > QObject *result; > > > > - if (!tokens) { > > + if (!ctxt) { > > return NULL; > > } > > - working = qlist_copy(tokens); > > - result = parse_value(&ctxt, &working, ap); > > > > - QDECREF(working); > > + result = parse_value(ctxt, ap); > > + > > + error_propagate(errp, ctxt->err); > > > > - error_propagate(errp, ctxt.err); > > + parser_context_free(ctxt); > > > > return result; > > } >