Re: [PATCH 02/10] Add JSON implementation

Eric Gallager Wed, 30 May 2018 10:25:56 -0700

On 5/29/18, David Malcolm <dmalc...@redhat.com> wrote:
> This patch is the JSON patch I posted last year;
> it adds support to gcc for reading and writing JSON,
> based on DOM-like trees of json::value instances.
>
> This is overkill for what's needed by the rest of the
> patch kit (which just needs to be able to write JSON),
> but this code already existed, so I'm using it for now.
>


I think I remember you posting this last year, but I forget where in
the archives it is. Could you post a link to the thread from last year
just for reference? Thanks.

> gcc/ChangeLog:
>       * Makefile.in (OBJS): Add json.o.
>       * json.cc: New file.
>       * json.h: New file.
>       * selftest-run-tests.c (selftest::run_tests): Call json_cc_tests.
>       * selftest.h (selftest::json_cc_tests): New decl.
> ---
>  gcc/Makefile.in          |    1 +
>  gcc/json.cc              | 1914
> ++++++++++++++++++++++++++++++++++++++++++++++
>  gcc/json.h               |  214 ++++++
>  gcc/selftest-run-tests.c |    1 +
>  gcc/selftest.h           |    1 +
>  5 files changed, 2131 insertions(+)
>  create mode 100644 gcc/json.cc
>  create mode 100644 gcc/json.h
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 20bee04..b3c7d5d 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1385,6 +1385,7 @@ OBJS = \
>       ira-color.o \
>       ira-emit.o \
>       ira-lives.o \
> +     json.o \
>       jump.o \
>       langhooks.o \
>       lcm.o \
> diff --git a/gcc/json.cc b/gcc/json.cc
> new file mode 100644
> index 0000000..e0d5a76
> --- /dev/null
> +++ b/gcc/json.cc
> @@ -0,0 +1,1914 @@
> +/* JSON parsing
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "json.h"
> +#include "pretty-print.h"
> +#include "math.h"
> +#include "selftest.h"
> +
> +using namespace json;
> +
> +/* class json::value.  */
> +
> +/* Generate a char * for this json::value tree.
> +   The returned value must be freed by the caller.  */
> +
> +char *
> +value::to_str () const
> +{
> +  pretty_printer pp;
> +  print (&pp);
> +  return xstrdup (pp_formatted_text (&pp));
> +}
> +
> +/* Dump this json::value tree to OUTF.
> +   No formatting is done.  There are no guarantees about the order
> +   in which the key/value pairs of json::objects are printed.  */
> +
> +void
> +value::dump (FILE *outf) const
> +{
> +  pretty_printer pp;
> +  pp_buffer (&pp)->stream = outf;
> +  print (&pp);
> +  pp_flush (&pp);
> +}
> +
> +/* If this json::value is a json::object, return it,
> +   otherwise return NULL.  */
> +
> +const object *
> +value::as_object () const
> +{
> +  if (get_kind () != JSON_OBJECT)
> +    return NULL;
> +  return static_cast <const object *> (this);
> +}
> +
> +/* If this json::value is a json::array, return it,
> +   otherwise return NULL.  */
> +
> +const array *
> +value::as_array () const
> +{
> +  if (get_kind () != JSON_ARRAY)
> +    return NULL;
> +  return static_cast <const array *> (this);
> +}
> +
> +/* If this json::value is a json::number, return it,
> +   otherwise return NULL.  */
> +
> +const number *
> +value::as_number () const
> +{
> +  if (get_kind () != JSON_NUMBER)
> +    return NULL;
> +  return static_cast <const number *> (this);
> +}
> +
> +/* If this json::value is a json::string, return it,
> +   otherwise return NULL.  */
> +
> +const string *
> +value::as_string () const
> +{
> +  if (get_kind () != JSON_STRING)
> +    return NULL;
> +  return static_cast <const string *> (this);
> +}
> +
> +/* Attempt to get the value of a key/value pair from this value
> +   as if THIS value were an object.
> +
> +   If THIS is not a json::object, return write an error message to OUT_ERR
> +   (which must be freed by the caller) and return false.
> +
> +   Otherwise write the value ptr (possibly NULL) to OUT_VALUE and
> +   return true.  */
> +
> +bool
> +value::get_optional_value_by_key (const char *name, const value
> *&out_value,
> +                               char *&out_err) const
> +{
> +  const json::object *obj = as_object ();
> +  if (!obj)
> +    {
> +      out_err = xstrdup ("not an object");
> +      return false;
> +    }
> +  out_value = obj->get (name);
> +  return true;
> +}
> +
> +/* Attempt to get a string value of a key/value pair from this value
> +   as if THIS value were an object.
> +
> +   If THIS is a json::object, and KEY is either not present, is a string,
> +   or is the "null" JSON literal, then return true, and write to
> OUT_VALUE.
> +   If a string, then the ptr is written to OUT_VALUE, otherwise NULL
> +   is written to OUT_VALUE.
> +
> +   If THIS is not a json::object, or KEY is not a string/"null",
> +   return false and write an error message to OUT_ERR
> +   (which must be freed by the caller).  */
> +
> +bool
> +value::get_optional_string_by_key (const char *name, const char
> *&out_value,
> +                                char *&out_err) const
> +{
> +  const json::value *v;
> +  if (!get_optional_value_by_key (name, v, out_err))
> +    return false;
> +  if (v && v->get_kind () != JSON_NULL)
> +    {
> +      const json::string *s = v->as_string ();
> +      if (!s)
> +     {
> +       out_err = xasprintf ("not a string: \"%s\"", name);
> +       return false;
> +     }
> +      out_value = s->get_string ();
> +      return true;
> +    }
> +  else
> +    {
> +      out_value = NULL;
> +      return true;
> +    }
> +}
> +
> +/* Attempt to get lookup the value of a key/value pair from this value
> +   as if this value were an object.
> +
> +   To succeed, THIS must be a json::object, and it must have a key named
> +   NAME.
> +
> +   On success, return true and write the value to OUT_VALUE.
> +   On failure, return false and write an error message to OUT_ERR
> +   (which must be freed by the caller).  */
> +
> +bool
> +value::get_value_by_key (const char *name, const value *&out_value,
> +                      char *&out_err) const
> +{
> +  const json::object *obj = as_object ();
> +  if (!obj)
> +    {
> +      out_err = xstrdup ("not an object");
> +      return false;
> +    }
> +  const json::value *v = obj->get (name);
> +  if (!v)
> +    {
> +      out_err = xasprintf ("missing attribute: \"%s\"", name);
> +      return false;
> +    }
> +  out_value = v;
> +  return true;
> +}
> +
> +/* As value::get_value_by_key, but the value must be a number;
> +   if successful, write it as an int to OUT_VALUE.  */
> +
> +bool
> +value::get_int_by_key (const char *name, int &out_value, char *&out_err)
> const
> +{
> +  const json::value *v;
> +  if (!get_value_by_key (name, v, out_err))
> +    return false;
> +  const json::number *n = v->as_number ();
> +  if (!n)
> +    {
> +      out_err = xasprintf ("not a number: \"%s\"", name);
> +      return false;
> +    }
> +  out_value = n->get ();
> +  return true;
> +}
> +
> +/* As value::get_value_by_key, but the value must be a string;
> +   if successful, write it as const char * to OUT_VALUE.  */
> +
> +bool
> +value::get_string_by_key (const char *name, const char *&out_value,
> +                       char *&out_err) const
> +{
> +  const json::value *v;
> +  if (!get_value_by_key (name, v, out_err))
> +    return false;
> +  const json::string *s = v->as_string ();
> +  if (!s)
> +    {
> +      out_err = xasprintf ("not a string: \"%s\"", name);
> +      return false;
> +    }
> +  out_value = s->get_string ();
> +  return true;
> +}
> +
> +/* As value::get_value_by_key, but the value must be an array;
> +   if successful, write it as a json::array * to OUT_VALUE.  */
> +
> +bool
> +value::get_array_by_key (const char *name, const array *&out_value,
> +                      char *&out_err) const
> +{
> +  const json::value *v;
> +  if (!get_value_by_key (name, v, out_err))
> +    return false;
> +  const json::array *arr = v->as_array ();
> +  if (!arr)
> +    {
> +      out_err = xasprintf ("not an array: \"%s\"", name);
> +      return false;
> +    }
> +  out_value = arr;
> +  return true;
> +}
> +
> +/* class json::object, a subclass of json::value, representing
> +   an unordered collection of key/value pairs.  */
> +
> +/* json:object's dtor.  */
> +
> +object::~object ()
> +{
> +  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
> +    {
> +      free (const_cast <char *>((*it).first));
> +      delete ((*it).second);
> +    }
> +}
> +
> +/* Implementation of json::value::print for json::object.  */
> +
> +void
> +object::print (pretty_printer *pp) const
> +{
> +  /* Note that the order is not guaranteed.  */
> +  pp_character (pp, '{');
> +  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
> +    {
> +      if (it != m_map.begin ())
> +     pp_string (pp, ", ");
> +      const char *key = const_cast <char *>((*it).first);
> +      value *value = (*it).second;
> +      pp_printf (pp, "\"%s\": ", key); // FIXME: escaping?
> +      value->print (pp);
> +    }
> +  pp_character (pp, '}');
> +}
> +
> +/* Implementation of json::value::clone for json::object.  */
> +
> +value *
> +object::clone () const
> +{
> +  object *other = new object ();
> +  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
> +    {
> +      const char *key = const_cast <char *>((*it).first);
> +      value *value = (*it).second;
> +      other->set (key, value->clone ());
> +    }
> +  return other;
> +}
> +
> +/* Get the json::value * for KEY, or NULL if the key is not present.  */
> +
> +value *
> +object::get (const char *key) const
> +{
> +  value **slot = const_cast <object*> (this)->m_map.get (key);
> +  if (slot)
> +    return *slot;
> +  return NULL;
> +}
> +
> +/* As object::get (KEY), but return NULL if the value of the key
> +   is the "null" JSON literal.  */
> +
> +value *
> +object::get_if_nonnull (const char *key) const
> +{
> +  value *result = get (key);
> +  if (!result)
> +    return NULL;
> +  if (result->get_kind () == JSON_NULL)
> +    return NULL;
> +  return result;
> +}
> +
> +/* Set the json::value * for KEY, taking ownership of VALUE
> +   (and taking a copy of KEY if necessary).  */
> +
> +void
> +object::set (const char *key, value *v)
> +{
> +  value **ptr = m_map.get (key);
> +  if (ptr)
> +    {
> +      /* If the key is already present, delete the existing value
> +      and overwrite it.  */
> +      delete *ptr;
> +      *ptr = v;
> +    }
> +  else
> +    /* If the key wasn't already present, take a copy of the key,
> +       and store the value.  */
> +    m_map.put (xstrdup (key), v);
> +}
> +
> +/* class json::array, a subclass of json::value, representing
> +   an ordered collection of values.  */
> +
> +/* json::array's dtor.  */
> +
> +array::~array ()
> +{
> +  unsigned i;
> +  value *v;
> +  FOR_EACH_VEC_ELT (m_elements, i, v)
> +    delete v;
> +}
> +
> +/* Implementation of json::value::print for json::array.  */
> +
> +void
> +array::print (pretty_printer *pp) const
> +{
> +  pp_character (pp, '[');
> +  unsigned i;
> +  value *v;
> +  FOR_EACH_VEC_ELT (m_elements, i, v)
> +    {
> +      if (i)
> +     pp_string (pp, ", ");
> +      v->print (pp);
> +    }
> +  pp_character (pp, ']');
> +}
> +
> +/* Implementation of json::value::clone for json::array.  */
> +
> +value *
> +array::clone () const
> +{
> +  array *other = new array ();
> +  unsigned i;
> +  value *v;
> +  FOR_EACH_VEC_ELT (m_elements, i, v)
> +    other->append (v->clone ());
> +  return other;
> +}
> +
> +/* class json::number, a subclass of json::value, wrapping a double.  */
> +
> +/* Implementation of json::value::print for json::number.  */
> +
> +void
> +number::print (pretty_printer *pp) const
> +{
> +  char tmp[1024];
> +  snprintf (tmp, sizeof (tmp), "%g", m_value);
> +  pp_string (pp, tmp);
> +}
> +
> +/* Implementation of json::value::clone for json::number.  */
> +
> +value *
> +number::clone () const
> +{
> +  return new number (m_value);
> +}
> +
> +/* class json::string, a subclass of json::value.  */
> +
> +void
> +string::print (pretty_printer *pp) const
> +{
> +  pp_character (pp, '"');
> +  for (const char *ptr = m_utf8; *ptr; ptr++)
> +    {
> +      char ch = *ptr;
> +      switch (ch)
> +     {
> +     case '"':
> +       pp_string (pp, "\\\"");
> +       break;
> +     case '\\':
> +       pp_string (pp, "\\n");
> +       break;
> +     case '\b':
> +       pp_string (pp, "\\b");
> +       break;
> +     case '\f':
> +       pp_string (pp, "\\f");
> +       break;
> +     case '\n':
> +       pp_string (pp, "\\n");
> +       break;
> +     case '\r':
> +       pp_string (pp, "\\r");
> +       break;
> +     case '\t':
> +       pp_string (pp, "\\t");
> +       break;
> +
> +     default:
> +       pp_character (pp, ch);
> +     }
> +    }
> +  pp_character (pp, '"');
> +}
> +
> +/* Implementation of json::value::clone for json::string.  */
> +
> +value *
> +string::clone () const
> +{
> +  return new string (m_utf8);
> +}
> +
> +/* class json::literal, a subclass of json::value.  */
> +
> +/* Implementation of json::value::print for json::literal.  */
> +
> +void
> +literal::print (pretty_printer *pp) const
> +{
> +  switch (m_kind)
> +    {
> +    case JSON_TRUE:
> +      pp_string (pp, "true");
> +      break;
> +    case JSON_FALSE:
> +      pp_string (pp, "false");
> +      break;
> +    case JSON_NULL:
> +      pp_string (pp, "null");
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +/* Implementation of json::value::clone for json::literal.  */
> +
> +value *
> +literal::clone () const
> +{
> +  return new literal (m_kind);
> +}
> +
> +
> +/* Declarations relating to parsing JSON, all within an
> +   anonymous namespace.  */
> +
> +namespace {
> +
> +/* A typedef representing a single unicode character.  */
> +
> +typedef unsigned unichar;
> +
> +/* An enum for discriminating different kinds of JSON token.  */
> +
> +enum token_id
> +{
> +  TOK_ERROR,
> +
> +  TOK_EOF,
> +
> +  /* Punctuation.  */
> +  TOK_OPEN_SQUARE,
> +  TOK_OPEN_CURLY,
> +  TOK_CLOSE_SQUARE,
> +  TOK_CLOSE_CURLY,
> +  TOK_COLON,
> +  TOK_COMMA,
> +
> +  /* Literal names.  */
> +  TOK_TRUE,
> +  TOK_FALSE,
> +  TOK_NULL,
> +
> +  TOK_STRING,
> +  TOK_NUMBER
> +};
> +
> +/* Human-readable descriptions of enum token_id.  */
> +
> +static const char *token_id_name[] = {
> +  "error",
> +  "EOF",
> +  "'['",
> +  "'{'",
> +  "']'",
> +  "'}'",
> +  "':'",
> +  "','",
> +  "'true'",
> +  "'false'",
> +  "'null'",
> +  "string",
> +  "number"
> +};
> +
> +/* Tokens within the JSON lexer.  */
> +
> +struct token
> +{
> +  /* The kind of token.  */
> +  enum token_id id;
> +
> +  /* The location of this token within the unicode
> +     character stream.  */
> +  int index;
> +
> +  union
> +  {
> +    /* Value for TOK_ERROR and TOK_STRING.  */
> +    char *string;
> +
> +    /* Value for TOK_NUMBER.  */
> +    double number;
> +  } u;
> +};
> +
> +/* A class for lexing JSON.  */
> +
> +class lexer
> +{
> + public:
> +  lexer ();
> +  ~lexer ();
> +  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
> +
> +  const token *peek ();
> +  void consume ();
> +
> + private:
> +  bool get_char (unichar &out);
> +  void unget_char ();
> +  static void dump_token (FILE *outf, const token *tok);
> +  void lex_token (token *out);
> +  void lex_string (token *out);
> +  void lex_number (token *out, unichar first_char);
> +  bool rest_of_literal (const char *suffix);
> +
> + private:
> +  auto_vec<unichar> m_buffer;
> +  int m_next_char_idx;
> +
> +  static const int MAX_TOKENS = 1;
> +  token m_next_tokens[MAX_TOKENS];
> +  int m_num_next_tokens;
> +};
> +
> +/* A class for parsing JSON.  */
> +
> +class parser
> +{
> + public:
> +  parser (char **err_out);
> +  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
> +  value *parse_value (int depth);
> +  object *parse_object (int depth);
> +  array *parse_array (int depth);
> +
> +  bool seen_error_p () const { return *m_err_out; }
> +  void require_eof ();
> +
> + private:
> +  void require (enum token_id tok_id);
> +  void error_at (int, const char *, ...) ATTRIBUTE_PRINTF_3;
> +
> + private:
> +  lexer m_lexer;
> +  char **m_err_out;
> +};
> +
> +} // anonymous namespace for parsing implementation
> +
> +/* Parser implementation.  */
> +
> +/* lexer's ctor.  */
> +
> +lexer::lexer ()
> +: m_buffer (), m_next_char_idx (0), m_num_next_tokens (0)
> +{
> +}
> +
> +/* lexer's dtor.  */
> +
> +lexer::~lexer ()
> +{
> +  while (m_num_next_tokens > 0)
> +    consume ();
> +}
> +
> +/* Peek the next token.  */
> +
> +const token *
> +lexer::peek ()
> +{
> +  if (m_num_next_tokens == 0)
> +    {
> +      lex_token (&m_next_tokens[0]);
> +      m_num_next_tokens++;
> +    }
> +  return &m_next_tokens[0];
> +}
> +
> +/* Consume the next token.  */
> +
> +void
> +lexer::consume ()
> +{
> +  if (m_num_next_tokens == 0)
> +    peek ();
> +
> +  gcc_assert (m_num_next_tokens > 0);
> +  gcc_assert (m_num_next_tokens <= MAX_TOKENS);
> +
> +  if (0)
> +    {
> +      fprintf (stderr, "consuming token: ");
> +      dump_token (stderr, &m_next_tokens[0]);
> +      fprintf (stderr, "\n");
> +    }
> +
> +  if (m_next_tokens[0].id == TOK_ERROR
> +      || m_next_tokens[0].id == TOK_STRING)
> +    free (m_next_tokens[0].u.string);
> +
> +  m_num_next_tokens--;
> +  memmove (&m_next_tokens[0], &m_next_tokens[1],
> +        sizeof (token) * m_num_next_tokens);
> +}
> +
> +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's
> +   buffer.  */
> +
> +bool
> +lexer::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
> +{
> +  /* FIXME: adapted from charset.c:one_utf8_to_cppchar.  */
> +  static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
> +  static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
> +
> +  const uchar *inbuf = (const unsigned char *) (utf8_buf);
> +  const uchar **inbufp = &inbuf;
> +  size_t *inbytesleftp = &length;
> +
> +  while (length > 0)
> +    {
> +      unichar c;
> +      const uchar *inbuf = *inbufp;
> +      size_t nbytes, i;
> +
> +      c = *inbuf;
> +      if (c < 0x80)
> +     {
> +       m_buffer.safe_push (c);
> +       *inbytesleftp -= 1;
> +       *inbufp += 1;
> +       continue;
> +     }
> +
> +      /* The number of leading 1-bits in the first byte indicates how many
> +      bytes follow.  */
> +      for (nbytes = 2; nbytes < 7; nbytes++)
> +     if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
> +       goto found;
> +      *err_out = xstrdup ("ill-formed UTF-8 sequence");
> +      return false;
> +    found:
> +
> +      if (*inbytesleftp < nbytes)
> +     {
> +       *err_out = xstrdup ("ill-formed UTF-8 sequence");
> +       return false;
> +     }
> +
> +      c = (c & masks[nbytes-1]);
> +      inbuf++;
> +      for (i = 1; i < nbytes; i++)
> +     {
> +       unichar n = *inbuf++;
> +       if ((n & 0xC0) != 0x80)
> +         {
> +           *err_out = xstrdup ("ill-formed UTF-8 sequence");
> +           return false;
> +         }
> +       c = ((c << 6) + (n & 0x3F));
> +     }
> +
> +      /* Make sure the shortest possible encoding was used.  */
> +      if ((   c <=      0x7F && nbytes > 1)
> +       || (c <=     0x7FF && nbytes > 2)
> +       || (c <=    0xFFFF && nbytes > 3)
> +       || (c <=  0x1FFFFF && nbytes > 4)
> +       || (c <= 0x3FFFFFF && nbytes > 5))
> +     {
> +       *err_out = xstrdup ("ill-formed UTF-8:"
> +                           " shortest possible encoding not used");
> +       return false;
> +     }
> +
> +      /* Make sure the character is valid.  */
> +      if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
> +     {
> +       *err_out = xstrdup ("ill-formed UTF-8: invalid character");
> +       return false;
> +     }
> +
> +      m_buffer.safe_push (c);
> +      *inbufp = inbuf;
> +      *inbytesleftp -= nbytes;
> +    }
> +  return true;
> +}
> +
> +/* Attempt to get the next unicode character from this lexer's buffer.
> +   If successful, write it to OUT and return true.
> +   Otherwise, return false.  */
> +
> +bool
> +lexer::get_char (unichar &out)
> +{
> +  if (m_next_char_idx >= (int)m_buffer.length ())
> +    return false;
> +
> +  out = m_buffer[m_next_char_idx++];
> +  return true;
> +}
> +
> +/* FIXME.  */
> +
> +void
> +lexer::unget_char ()
> +{
> +  --m_next_char_idx;
> +}
> +
> +/* Print a textual representation of TOK to OUTF.
> +   This is intended for debugging the lexer and parser,
> +   rather than for user-facing output.  */
> +
> +void
> +lexer::dump_token (FILE *outf, const token *tok)
> +{
> +  switch (tok->id)
> +    {
> +    case TOK_ERROR:
> +      fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string);
> +      break;
> +
> +    case TOK_EOF:
> +      fprintf (outf, "TOK_EOF");
> +      break;
> +
> +    case TOK_OPEN_SQUARE:
> +      fprintf (outf, "TOK_OPEN_SQUARE");
> +      break;
> +
> +    case TOK_OPEN_CURLY:
> +      fprintf (outf, "TOK_OPEN_CURLY");
> +      break;
> +
> +    case TOK_CLOSE_SQUARE:
> +      fprintf (outf, "TOK_CLOSE_SQUARE");
> +      break;
> +
> +    case TOK_CLOSE_CURLY:
> +      fprintf (outf, "TOK_CLOSE_CURLY");
> +      break;
> +
> +    case TOK_COLON:
> +      fprintf (outf, "TOK_COLON");
> +      break;
> +
> +    case TOK_COMMA:
> +      fprintf (outf, "TOK_COMMA");
> +      break;
> +
> +    case TOK_TRUE:
> +      fprintf (outf, "TOK_TRUE");
> +      break;
> +
> +    case TOK_FALSE:
> +      fprintf (outf, "TOK_FALSE");
> +      break;
> +
> +    case TOK_NULL:
> +      fprintf (outf, "TOK_NULL");
> +      break;
> +
> +    case TOK_STRING:
> +      fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string);
> +      break;
> +
> +    case TOK_NUMBER:
> +      fprintf (outf, "TOK_NUMBER (%f)", tok->u.number);
> +      break;
> +
> +    default:
> +      gcc_unreachable ();
> +      break;
> +    }
> +}
> +
> +/* Attempt to lex the input buffer, writing the next token to OUT.
> +   On errors, TOK_ERROR (or TOK_EOF) is written to OUT.  */
> +
> +void
> +lexer::lex_token (token *out)
> +{
> +  /* Skip to next non-whitespace char.  */
> +  unichar next_char;
> +  while (1)
> +    {
> +      out->index = m_next_char_idx;
> +      if (!get_char (next_char))
> +     {
> +       out->id = TOK_EOF;
> +       return;
> +     }
> +      if (next_char != ' '
> +       && next_char != '\t'
> +       && next_char != '\n'
> +       && next_char != '\r')
> +     break;
> +    }
> +
> +  switch (next_char)
> +    {
> +    case '[':
> +      out->id = TOK_OPEN_SQUARE;
> +      break;
> +
> +    case '{':
> +      out->id = TOK_OPEN_CURLY;
> +      break;
> +
> +    case ']':
> +      out->id = TOK_CLOSE_SQUARE;
> +      break;
> +
> +    case '}':
> +      out->id = TOK_CLOSE_CURLY;
> +      break;
> +
> +    case ':':
> +      out->id = TOK_COLON;
> +      break;
> +
> +    case ',':
> +      out->id = TOK_COMMA;
> +      break;
> +
> +    case '"':
> +      lex_string (out);
> +      break;
> +
> +    case '-':
> +    case '0':
> +    case '1':
> +    case '2':
> +    case '3':
> +    case '4':
> +    case '5':
> +    case '6':
> +    case '7':
> +    case '8':
> +    case '9':
> +      lex_number (out, next_char);
> +      break;
> +
> +    case 't':
> +      /* Handle literal "true".  */
> +      if (rest_of_literal ("rue"))
> +     {
> +       out->id = TOK_TRUE;
> +       break;
> +     }
> +      else
> +     goto err;
> +
> +    case 'f':
> +      /* Handle literal "false".  */
> +      if (rest_of_literal ("alse"))
> +     {
> +       out->id = TOK_FALSE;
> +       break;
> +     }
> +      else
> +     goto err;
> +
> +    case 'n':
> +      /* Handle literal "null".  */
> +      if (rest_of_literal ("ull"))
> +     {
> +       out->id = TOK_NULL;
> +       break;
> +     }
> +      else
> +     goto err;
> +
> +    err:
> +    default:
> +      out->id = TOK_ERROR;
> +      out->u.string = xasprintf ("unexpected character: %c", next_char);
> +      break;
> +    }
> +}
> +
> +/* Having consumed an open-quote character from the lexer's buffer,
> attempt
> +   to lex the rest of a JSON string, writing the result to OUT (or
> TOK_ERROR)
> +   if an error occurred.
> +   (ECMA-404 section 9; RFC 7159 section 7).  */
> +
> +void
> +lexer::lex_string (token *out)
> +{
> +  auto_vec<unichar> content;
> +  bool still_going = true;
> +  while (still_going)
> +    {
> +      unichar uc;
> +      if (!get_char (uc))
> +     {
> +       out->id = TOK_ERROR;
> +       out->u.string = xstrdup ("EOF within string");
> +       return;
> +     }
> +      switch (uc)
> +     {
> +     case '"':
> +       still_going = false;
> +       break;
> +     case '\\':
> +       {
> +         unichar next_char;
> +         if (!get_char (next_char))
> +           {
> +             out->id = TOK_ERROR;
> +             out->u.string = xstrdup ("EOF within string");;
> +             return;
> +           }
> +         switch (next_char)
> +           {
> +           case '"':
> +           case '\\':
> +           case '/':
> +             content.safe_push (next_char);
> +             break;
> +
> +           case 'b':
> +             content.safe_push ('\b');
> +             break;
> +
> +           case 'f':
> +             content.safe_push ('\f');
> +             break;
> +
> +           case 'n':
> +             content.safe_push ('\n');
> +             break;
> +
> +           case 'r':
> +             content.safe_push ('\r');
> +             break;
> +
> +           case 't':
> +             content.safe_push ('\t');
> +             break;
> +
> +           case 'u':
> +             {
> +               unichar result = 0;
> +               for (int i = 0; i < 4; i++)
> +                 {
> +                   unichar hexdigit;
> +                   if (!get_char (hexdigit))
> +                     {
> +                       out->id = TOK_ERROR;
> +                       out->u.string = xstrdup ("EOF within string");
> +                       return;
> +                     }
> +                   result <<= 4;
> +                   if (hexdigit >= '0' && hexdigit <= '9')
> +                     result += hexdigit - '0';
> +                   else if (hexdigit >= 'a' && hexdigit <= 'f')
> +                     result += (hexdigit - 'a') + 10;
> +                   else if (hexdigit >= 'A' && hexdigit <= 'F')
> +                     result += (hexdigit - 'A') + 10;
> +                   else
> +                     {
> +                       out->id = TOK_ERROR;
> +                       out->u.string = xstrdup ("bogus hex char");
> +                       return;
> +                     }
> +                 }
> +               content.safe_push (result);
> +             }
> +             break;
> +
> +           default:
> +             out->id = TOK_ERROR;
> +             out->u.string = xstrdup ("unrecognized escape char");
> +             return;
> +           }
> +       }
> +       break;
> +
> +     default:
> +       /* Reject unescaped control characters U+0000 through U+001F
> +          (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1).  */
> +       if (uc <= 0x1f)
> +         {
> +             out->id = TOK_ERROR;
> +             out->u.string = xstrdup ("unescaped control char");
> +             return;
> +         }
> +
> +       /* Otherwise, add regular unicode code point.  */
> +       content.safe_push (uc);
> +       break;
> +     }
> +    }
> +
> +  out->id = TOK_STRING;
> +
> +  auto_vec<char> utf8_buf;
> +  // FIXME: adapted from libcpp/charset.c:one_cppchar_to_utf8
> +  for (unsigned i = 0; i < content.length (); i++)
> +    {
> +      static const uchar masks[6] =  { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
> };
> +      static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE
> };
> +      size_t nbytes;
> +      uchar buf[6], *p = &buf[6];
> +      unichar c = content[i];
> +
> +      nbytes = 1;
> +      if (c < 0x80)
> +     *--p = c;
> +      else
> +     {
> +       do
> +         {
> +           *--p = ((c & 0x3F) | 0x80);
> +           c >>= 6;
> +           nbytes++;
> +         }
> +       while (c >= 0x3F || (c & limits[nbytes-1]));
> +       *--p = (c | masks[nbytes-1]);
> +     }
> +
> +      while (p < &buf[6])
> +     utf8_buf.safe_push (*p++);
> +    }
> +
> +  out->u.string = XNEWVEC (char, utf8_buf.length () + 1);
> +  for (unsigned i = 0; i < utf8_buf.length (); i++)
> +    out->u.string[i] = utf8_buf[i];
> +  out->u.string[utf8_buf.length ()] = '\0';
> +
> +  // FIXME: leaks?  have a json_context do the allocation
> +}
> +
> +/* Having consumed FIRST_CHAR, an initial digit or '-' character from
> +   the lexer's buffer attempt to lex the rest of a JSON number, writing
> +   the result to OUT (or TOK_ERROR) if an error occurred.
> +   (ECMA-404 section 8; RFC 7159 section 6).  */
> +
> +void
> +lexer::lex_number (token *out, unichar first_char)
> +{
> +  bool negate = false;
> +  double value = 0.0;
> +  if (first_char == '-')
> +    {
> +      negate = true;
> +      if (!get_char (first_char))
> +     {
> +       out->id = TOK_ERROR;
> +       out->u.string = xstrdup ("expected digit");
> +       return;
> +     }
> +    }
> +
> +  if (first_char == '0')
> +    value = 0.0;
> +  else if (!ISDIGIT (first_char))
> +    {
> +      out->id = TOK_ERROR;
> +      out->u.string = xstrdup ("expected digit");
> +      return;
> +    }
> +  else
> +    {
> +      /* Got a nonzero digit; expect zero or more digits.  */
> +      value = first_char - '0';
> +      while (1)
> +     {
> +       unichar uc;
> +       if (!get_char (uc))
> +         break;
> +       if (ISDIGIT (uc))
> +         {
> +           value *= 10;
> +           value += uc -'0';
> +           continue;
> +         }
> +       else
> +         {
> +           unget_char ();
> +           break;
> +         }
> +     }
> +    }
> +
> +  /* Optional '.', followed by one or more decimals.  */
> +  unichar next_char;
> +  if (get_char (next_char))
> +    {
> +      if (next_char == '.')
> +     {
> +       /* Parse decimal digits.  */
> +       bool had_digit = false;
> +       // FIXME: does this lose too much precision?
> +       double digit_factor = 0.1;
> +       while (get_char (next_char))
> +         {
> +           if (!ISDIGIT (next_char))
> +             {
> +               unget_char ();
> +               break;
> +             }
> +           value += (next_char - '0') * digit_factor;
> +           digit_factor *= 0.1;
> +           had_digit = true;
> +         }
> +       if (!had_digit)
> +         {
> +           out->id = TOK_ERROR;
> +           out->u.string = xstrdup ("expected digit");
> +           return;
> +         }
> +     }
> +      else
> +     unget_char ();
> +    }
> +
> +  /* Parse 'e' and 'E'.  */
> +  unichar exponent_char;
> +  if (get_char (exponent_char))
> +    {
> +      if (exponent_char == 'e' || exponent_char == 'E')
> +     {
> +       /* Optional +/-.  */
> +       unichar sign_char;
> +       int exponent = 0;
> +       bool negate_exponent = false;
> +       bool had_exponent_digit = false;
> +       if (!get_char (sign_char))
> +         {
> +           out->id = TOK_ERROR;
> +           out->u.string = xstrdup ("EOF within exponent");
> +           return;
> +         }
> +       if (sign_char == '-')
> +         negate_exponent = true;
> +       else if (sign_char == '+')
> +         ;
> +       else if (ISDIGIT (sign_char))
> +         {
> +           exponent = sign_char - '0';
> +           had_exponent_digit = true;
> +         }
> +       else
> +         {
> +           out->id = TOK_ERROR;
> +           out->u.string
> +             = xstrdup ("expected '-','+' or digit within exponent");
> +           return;
> +         }
> +
> +       /* One or more digits (we might have seen the digit above,
> +          though).  */
> +       while (1)
> +         {
> +           unichar uc;
> +           if (!get_char (uc))
> +             break;
> +           if (ISDIGIT (uc))
> +             {
> +               exponent *= 10;
> +               exponent += uc -'0';
> +               had_exponent_digit = true;
> +               continue;
> +             }
> +           else
> +             {
> +               unget_char ();
> +               break;
> +             }
> +         }
> +       if (!had_exponent_digit)
> +         {
> +           out->id = TOK_ERROR;
> +           out->u.string = xstrdup ("expected digit within exponent");
> +           return;
> +         }
> +       if (negate_exponent)
> +         exponent = -exponent;
> +       /* FIXME: better way to do this?  */
> +       value = value * pow (10, exponent);
> +     }
> +      else
> +     unget_char ();
> +    }
> +
> +  if (negate)
> +    value = -value;
> +
> +  out->id = TOK_NUMBER;
> +  out->u.number = value;
> +}
> +
> +/* Determine if the next characters to be lexed match SUFFIX.
> +   SUFFIX must be pure ASCII.
> +   If so, consume the characters and return true.
> +   Otherwise, return false.  */
> +
> +bool
> +lexer::rest_of_literal (const char *suffix)
> +{
> +  int suffix_idx = 0;
> +  int buf_idx = m_next_char_idx;
> +  while (1)
> +    {
> +      if (suffix[suffix_idx] == '\0')
> +     {
> +       m_next_char_idx += suffix_idx;
> +       return true;
> +     }
> +      if (buf_idx >= (int)m_buffer.length ())
> +     return false;
> +      /* This assumes that suffix is ASCII.  */
> +      if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx])
> +     return false;
> +      buf_idx++;
> +      suffix_idx++;
> +    }
> +}
> +
> +/* parser's ctor.  */
> +
> +parser::parser (char **err_out)
> +: m_lexer (), m_err_out (err_out)
> +{
> +  gcc_assert (err_out);
> +  gcc_assert (*err_out == NULL);
> +  *err_out = NULL;
> +}
> +
> +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
> +   lexer's buffer.  */
> +
> +bool
> +parser::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
> +{
> +  return m_lexer.add_utf8 (length, utf8_buf, err_out);
> +}
> +
> +/* Parse a JSON value (object, array, number, string, or literal).
> +   (ECMA-404 section 5; RFC 7159 section 3).  */
> +
> +value *
> +parser::parse_value (int depth)
> +{
> +  const token *tok = m_lexer.peek ();
> +
> +  /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9
> +     states: "An implementation may set limits on the maximum depth
> +     of nesting.".
> +
> +     Ideally we'd avoid this limit (e.g. by rewriting parse_value,
> +     parse_object, and parse_array into a single function with a vec of
> +     state).  */
> +  const int MAX_DEPTH = 100;
> +  if (depth >= MAX_DEPTH)
> +    {
> +      error_at (tok->index, "maximum nesting depth exceeded: %i",
> MAX_DEPTH);
> +      return NULL;
> +    }
> +
> +  switch (tok->id)
> +    {
> +    case TOK_OPEN_CURLY:
> +      return parse_object (depth);
> +
> +    case TOK_STRING:
> +      {
> +     string *result = new string (tok->u.string);
> +     m_lexer.consume ();
> +     return result;
> +      }
> +
> +    case TOK_OPEN_SQUARE:
> +      return parse_array (depth);
> +
> +    case TOK_NUMBER:
> +      {
> +     number *result = new number (tok->u.number);
> +     m_lexer.consume ();
> +     return result;
> +      }
> +
> +    case TOK_TRUE:
> +      {
> +     literal *result = new literal (JSON_TRUE);
> +     m_lexer.consume ();
> +     return result;
> +      }
> +
> +    case TOK_FALSE:
> +      {
> +     literal *result = new literal (JSON_FALSE);
> +     m_lexer.consume ();
> +     return result;
> +      }
> +
> +    case TOK_NULL:
> +      {
> +     literal *result = new literal (JSON_NULL);
> +     m_lexer.consume ();
> +     return result;
> +      }
> +
> +    default:
> +      error_at (tok->index, "unexpected token: %s",
> token_id_name[tok->id]);
> +      return NULL;
> +    }
> +}
> +
> +/* Parse a JSON object.
> +   (ECMA-404 section 6; RFC 7159 section 4).  */
> +
> +object *
> +parser::parse_object (int depth)
> +{
> +  require (TOK_OPEN_CURLY);
> +
> +  object *result = new object ();
> +
> +  const token *tok = m_lexer.peek ();
> +  if (tok->id == TOK_CLOSE_CURLY)
> +    {
> +      require (TOK_CLOSE_CURLY);
> +      return result;
> +    }
> +  if (tok->id != TOK_STRING)
> +    {
> +      error_at (tok->index, "expected string for object key");
> +      return result;
> +    }
> +  while (!seen_error_p ())
> +    {
> +      tok = m_lexer.peek ();
> +      if (tok->id != TOK_STRING)
> +     {
> +       error_at (tok->index, "expected string for object key");
> +       return result;
> +     }
> +      char *key = xstrdup (tok->u.string);
> +      m_lexer.consume ();
> +
> +      require (TOK_COLON);
> +
> +      value *v = parse_value (depth + 1);
> +      if (!v)
> +     {
> +       free (key);
> +       return result;
> +     }
> +      /* We don't enforce uniqueness for keys.  */
> +      result->set (key, v);
> +      free (key);
> +
> +      tok = m_lexer.peek ();
> +      if (tok->id == TOK_COMMA)
> +     {
> +       m_lexer.consume ();
> +       continue;
> +     }
> +      else
> +     {
> +       require (TOK_CLOSE_CURLY);
> +       break;
> +     }
> +    }
> +  return result;
> +}
> +
> +/* Parse a JSON array.
> +   (ECMA-404 section 7; RFC 7159 section 5).  */
> +
> +array *
> +parser::parse_array (int depth)
> +{
> +  require (TOK_OPEN_SQUARE);
> +
> +  array *result = new array ();
> +
> +  const token *tok = m_lexer.peek ();
> +  if (tok->id == TOK_CLOSE_SQUARE)
> +    {
> +      m_lexer.consume ();
> +      return result;
> +    }
> +
> +  while (!seen_error_p ())
> +    {
> +      value *v = parse_value (depth + 1);
> +      if (!v)
> +     return result;
> +
> +      result->append (v);
> +
> +      tok = m_lexer.peek ();
> +      if (tok->id == TOK_COMMA)
> +     {
> +       m_lexer.consume ();
> +       continue;
> +     }
> +      else
> +     {
> +       require (TOK_CLOSE_SQUARE);
> +       break;
> +     }
> +    }
> +
> +  return result;
> +}
> +
> +/* Require an EOF, or fail if there is surplus input.  */
> +
> +void
> +parser::require_eof ()
> +{
> +  require (TOK_EOF);
> +}
> +
> +/* Consume the next token, issuing an error if it is not of kind TOK_ID.
> */
> +
> +void
> +parser::require (enum token_id tok_id)
> +{
> +  const token *tok = m_lexer.peek ();
> +  if (tok->id != tok_id)
> +    {
> +      if (tok->id == TOK_ERROR)
> +     error_at (tok->index, "expected %s; got bad token: %s",
> +               token_id_name[tok_id], tok->u.string);
> +      else
> +     error_at (tok->index, "expected %s; got %s", token_id_name[tok_id],
> +               token_id_name[tok->id]);
> +    }
> +  m_lexer.consume ();
> +}
> +
> +/* Issue a parsing error.  If this is the first error that has occurred on
> +   the parser, store it within the parser's m_err_out (the buffer will
> +   eventually need to be free by the caller of the parser).
> +   Otherwise the error is discarded.
> +
> +   TODO: maybe provide a callback so that client code can print all errors?
>  */
> +
> +void
> +parser::error_at (int index, const char *fmt, ...)
> +{
> +  va_list ap;
> +  va_start (ap, fmt);
> +  char *formatted = xvasprintf (fmt, ap);
> +  va_end (ap);
> +
> +  char *msg_with_index = xasprintf ("error at index %i: %s",
> +                                 index, formatted);
> +  free (formatted);
> +
> +  if (0)
> +    fprintf (stderr, "%s\n", msg_with_index);
> +  if (*m_err_out == NULL)
> +    *m_err_out = msg_with_index;
> +  else
> +    free (msg_with_index);
> +}
> +
> +/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF
> +   of the given LENGTH.
> +   If successful, return a non-NULL json::value *.
> +   if there was a problem, return NULL and write an error
> +   message to err_out, which must be freed by the caller.  */
> +
> +value *
> +json::parse_utf8_string (size_t length, const char *utf8_buf,
> +                      char **err_out)
> +{
> +  gcc_assert (err_out);
> +  gcc_assert (*err_out == NULL);
> +
> +  parser p (err_out);
> +  if (!p.add_utf8 (length, utf8_buf, err_out))
> +    return NULL;
> +  value *result = p.parse_value (0);
> +  if (!p.seen_error_p ())
> +    p.require_eof ();
> +  if (p.seen_error_p ())
> +    {
> +      gcc_assert (*err_out);
> +      delete result;
> +      return NULL;
> +    }
> +  return result;
> +}
> +
> +/* Attempt to parse the nil-terminated UTF-8 encoded buffer at
> +   UTF8_BUF.
> +   If successful, return a non-NULL json::value *.
> +   if there was a problem, return NULL and write an error
> +   message to err_out, which must be freed by the caller.  */
> +
> +value *
> +json::parse_utf8_string (const char *utf8, char **err_out)
> +{
> +  return parse_utf8_string (strlen (utf8), utf8, err_out);
> +}
> +
> +
> +#if CHECKING_P
> +
> +namespace selftest {
> +
> +/* Selftests.  */
> +
> +/* Verify that JV->to_str () equals EXPECTED_JSON.  */
> +
> +static void
> +assert_to_str_eq (const char *expected_json, json::value *jv)
> +{
> +  char *json = jv->to_str ();
> +  ASSERT_STREQ (expected_json, json);
> +  free (json);
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_parse_string ()
> +{
> +  char *err = NULL;
> +  json::value *jv = parse_utf8_string ("\"foo\"", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_STRING, jv->get_kind ());
> +  ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ());
> +  assert_to_str_eq ("\"foo\"", jv);
> +
> +  json::value *clone = jv->clone ();
> +  ASSERT_EQ (JSON_STRING, clone->get_kind ());
> +  ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ());
> +  assert_to_str_eq ("\"foo\"", clone);
> +  delete clone;
> +  delete jv;
> +
> +  const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
> +  jv = parse_utf8_string (contains_quotes, &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_STRING, jv->get_kind ());
> +  ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string
> ());
> +  assert_to_str_eq (contains_quotes, jv);
> +  delete jv;
> +
> +  /* Test of non-ASCII input.  This string is the Japanese word
> "mojibake",
> +     written as C octal-escaped UTF-8.  */
> +  const char *mojibake = (/* Opening quote.  */
> +                       "\""
> +                       /* U+6587 CJK UNIFIED IDEOGRAPH-6587
> +                          UTF-8: 0xE6 0x96 0x87
> +                          C octal escaped UTF-8: \346\226\207.  */
> +                       "\346\226\207"
> +                       /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
> +                          UTF-8: 0xE5 0xAD 0x97
> +                          C octal escaped UTF-8: \345\255\227.  */
> +                       "\345\255\227"
> +                      /* U+5316 CJK UNIFIED IDEOGRAPH-5316
> +                           UTF-8: 0xE5 0x8C 0x96
> +                           C octal escaped UTF-8: \345\214\226.  */
> +                       "\345\214\226"
> +                      /* U+3051 HIRAGANA LETTER KE
> +                           UTF-8: 0xE3 0x81 0x91
> +                           C octal escaped UTF-8: \343\201\221.  */
> +                       "\343\201\221"
> +                       /* Closing quote.  */
> +                       "\"");
> +  jv = parse_utf8_string (mojibake, &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_STRING, jv->get_kind ());
> +  /* Result of get_string should be UTF-8 encoded, without quotes.  */
> +  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226"
> "\343\201\221",
> +             ((json::string *)jv)->get_string ());
> +  /* Result of dump should be UTF-8 encoded, with quotes.  */
> +  assert_to_str_eq (mojibake, jv);
> +  delete jv;
> +
> +  /* Test of \u-escaped unicode.  This is "mojibake" again, as above.  */
> +  const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\"";
> +  jv = parse_utf8_string (escaped_unicode, &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_STRING, jv->get_kind ());
> +  /* Result of get_string should be UTF-8 encoded, without quotes.  */
> +  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226"
> "\343\201\221",
> +             ((json::string *)jv)->get_string ());
> +  /* Result of dump should be UTF-8 encoded, with quotes.  */
> +  assert_to_str_eq (mojibake, jv);
> +  delete jv;
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_parse_number ()
> +{
> +  json::value *jv, *clone;
> +
> +  char *err = NULL;
> +  jv = parse_utf8_string ("42", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
> +  ASSERT_EQ (42.0, ((json::number *)jv)->get ());
> +  assert_to_str_eq ("42", jv);
> +  clone = jv->clone ();
> +  ASSERT_EQ (JSON_NUMBER, clone->get_kind ());
> +  delete clone;
> +  delete jv;
> +
> +  /* Negative number.  */
> +  jv = parse_utf8_string ("-17", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
> +  ASSERT_EQ (-17.0, ((json::number *)jv)->get ());
> +  assert_to_str_eq ("-17", jv);
> +  delete jv;
> +
> +  /* Decimal.  */
> +  jv = parse_utf8_string ("3.141", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
> +  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
> +  assert_to_str_eq ("3.141", jv);
> +  delete jv;
> +
> +  /* Exponents.  */
> +  jv = parse_utf8_string ("3.141e+0", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
> +  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
> +  assert_to_str_eq ("3.141", jv);
> +  delete jv;
> +
> +  jv = parse_utf8_string ("42e2", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
> +  ASSERT_EQ (4200, ((json::number *)jv)->get ());
> +  assert_to_str_eq ("4200", jv);
> +  delete jv;
> +
> +  jv = parse_utf8_string ("42e-1", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
> +  ASSERT_EQ (4.2, ((json::number *)jv)->get ());
> +  assert_to_str_eq ("4.2", jv);
> +  delete jv;
> +
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_parse_array ()
> +{
> +  json::value *jv, *clone;
> +
> +  char *err = NULL;
> +  jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_EQ (JSON_ARRAY, jv->get_kind ());
> +  json::array *arr = static_cast <json::array *> (jv);
> +  ASSERT_EQ (10, arr->get_length ());
> +  for (int i = 0; i < 10; i++)
> +    {
> +      json::value *element = arr->get (i);
> +      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
> +      ASSERT_EQ (i, ((json::number *)element)->get ());
> +    }
> +  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", jv);
> +
> +  clone = jv->clone ();
> +  ASSERT_EQ (JSON_ARRAY, clone->get_kind ());
> +  arr = static_cast <json::array *> (clone);
> +  ASSERT_EQ (10, arr->get_length ());
> +  for (int i = 0; i < 10; i++)
> +    {
> +      json::value *element = arr->get (i);
> +      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
> +      ASSERT_EQ (i, ((json::number *)element)->get ());
> +    }
> +  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", clone);
> +  delete clone;
> +
> +  delete jv;
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_parse_object ()
> +{
> +  char *err = NULL;
> +  json::value *jv
> +    = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, null]}", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_TRUE (jv != NULL);
> +  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
> +  json::object *jo = static_cast <json::object *> (jv);
> +
> +  json::value *foo_value = jo->get ("foo");
> +  ASSERT_TRUE (foo_value != NULL);
> +  ASSERT_EQ (JSON_STRING, foo_value->get_kind ());
> +  ASSERT_STREQ ("bar", ((json::string *)foo_value)->get_string ());
> +
> +  json::value *baz_value = jo->get ("baz");
> +  ASSERT_TRUE (baz_value != NULL);
> +  ASSERT_EQ (JSON_ARRAY, baz_value->get_kind ());
> +
> +  json::array *baz_array = (json::array *)baz_value;
> +  ASSERT_EQ (2, baz_array->get_length ());
> +  ASSERT_EQ (42, baz_array->get (0)->as_number ()->get ());
> +  ASSERT_EQ (JSON_NULL, baz_array->get (1)->get_kind ());
> +
> +  // TODO: error-handling
> +  // TODO: partial document
> +
> +  /* We can't use assert_to_str_eq since ordering is not guaranteed.  */
> +
> +  json::value *clone = jv->clone ();
> +  ASSERT_EQ (JSON_OBJECT, clone->get_kind ());
> +  ASSERT_EQ (JSON_STRING, clone->as_object ()->get ("foo")->get_kind ());
> +  delete clone;
> +
> +  delete jv;
> +}
> +
> +/* Verify that the literals "true", "false" and "null" are parsed,
> +   dumped, and are clonable.  */
> +
> +static void
> +test_parse_literals ()
> +{
> +  json::value *jv, *clone;
> +  char *err = NULL;
> +  jv = parse_utf8_string ("true", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_TRUE (jv != NULL);
> +  ASSERT_EQ (JSON_TRUE, jv->get_kind ());
> +  assert_to_str_eq ("true", jv);
> +  clone = jv->clone ();
> +  ASSERT_EQ (JSON_TRUE, clone->get_kind ());
> +  delete clone;
> +  delete jv;
> +
> +  jv = parse_utf8_string ("false", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_TRUE (jv != NULL);
> +  ASSERT_EQ (JSON_FALSE, jv->get_kind ());
> +  assert_to_str_eq ("false", jv);
> +  clone = jv->clone ();
> +  ASSERT_EQ (JSON_FALSE, clone->get_kind ());
> +  delete clone;
> +  delete jv;
> +
> +  jv = parse_utf8_string ("null", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_TRUE (jv != NULL);
> +  ASSERT_EQ (JSON_NULL, jv->get_kind ());
> +  assert_to_str_eq ("null", jv);
> +  clone = jv->clone ();
> +  ASSERT_EQ (JSON_NULL, clone->get_kind ());
> +  delete clone;
> +  delete jv;
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_parse_jsonrpc ()
> +{
> +  char *err = NULL;
> +  const char *request
> +    = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\","
> +       " \"params\": [42, 23], \"id\": 1}");
> +  json::value *jv = parse_utf8_string (request, &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_TRUE (jv != NULL);
> +  delete jv;
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_parse_empty_object ()
> +{
> +  char *err = NULL;
> +  json::value *jv = parse_utf8_string ("{}", &err);
> +  ASSERT_EQ (NULL, err);
> +  ASSERT_TRUE (jv != NULL);
> +  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
> +  assert_to_str_eq ("{}", jv);
> +  delete jv;
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_error_empty_string ()
> +{
> +  char *err = NULL;
> +  json::value *jv = parse_utf8_string ("", &err);
> +  ASSERT_STREQ ("error at index 0: unexpected token: EOF", err);
> +  ASSERT_TRUE (jv == NULL);
> +  free (err);
> +}
> +
> +/* FIXME.  */
> +
> +static void
> +test_error_missing_comma ()
> +{
> +  char *err = NULL;
> +  /*                  01234567.  */
> +  const char *json = "[0, 1 2]";
> +  json::value *jv = parse_utf8_string (json, &err);
> +  ASSERT_STREQ ("error at index 6: expected ']'; got number",
> +             err);
> +  // FIXME: unittest the lexer?
> +  ASSERT_TRUE (jv == NULL);
> +  free (err);
> +}
> +
> +/* Run all of the selftests within this file.  */
> +
> +void
> +json_cc_tests ()
> +{
> +  test_parse_string ();
> +  test_parse_number ();
> +  test_parse_array ();
> +  test_parse_object ();
> +  test_parse_literals ();
> +  test_parse_jsonrpc ();
> +  test_parse_empty_object ();
> +  test_error_empty_string ();
> +  test_error_missing_comma ();
> +
> +  /* FIXME: tests for roundtripping (noting that we don't preserve
> +     object key ordering).  */
> +
> +  /* FIXME: cloning.  */
> +}
> +
> +} // namespace selftest
> +
> +#endif /* #if CHECKING_P */
> diff --git a/gcc/json.h b/gcc/json.h
> new file mode 100644
> index 0000000..aedf84a
> --- /dev/null
> +++ b/gcc/json.h
> @@ -0,0 +1,214 @@
> +/* JSON parsing
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#ifndef GCC_JSON_H
> +#define GCC_JSON_H
> +
> +/* Implementation of JSON, a lightweight data-interchange format.
> +
> +   See http://www.json.org/
> +   and
> http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
> +   and https://tools.ietf.org/html/rfc7159
> +
> +   Supports parsing text into a DOM-like tree of json::value *, dumping
> +   json::value * to text.  */
> +
> +namespace json
> +{
> +
> +/* Forward decls of json::value and its subclasses (using indentation
> +   to denote inheritance.  */
> +
> +class value;
> +  class object;
> +  class array;
> +  class number;
> +  class string;
> +  class literal;
> +
> +/* An enum for discriminating the subclasses of json::value.  */
> +
> +enum kind
> +{
> +  /* class json::object.  */
> +  JSON_OBJECT,
> +
> +  /* class json::array.  */
> +  JSON_ARRAY,
> +
> +  /* class json::number.  */
> +  JSON_NUMBER,
> +
> +  /* class json::string.  */
> +  JSON_STRING,
> +
> +  /* class json::literal uses these three values to identify the
> +     particular literal.  */
> +  JSON_TRUE,
> +  JSON_FALSE,
> +  JSON_NULL
> +};
> +
> +/* Base class of JSON value.  */
> +
> +class value
> +{
> + public:
> +  virtual ~value () {}
> +  virtual enum kind get_kind () const = 0;
> +  virtual void print (pretty_printer *pp) const = 0;
> +
> +  /* Create a deep copy of the value, returning a value which must be
> +     deleted by the caller.  */
> +  virtual value *clone () const = 0;
> +
> +  char *to_str () const;
> +  void dump (FILE *) const;
> +
> +  /* Methods for dynamically casting a value to one of the subclasses,
> +     returning NULL if the value is of the wrong kind.  */
> +  const object *as_object () const;
> +  const array *as_array () const;
> +  const number *as_number () const;
> +  const string *as_string () const;
> +
> +  /* Convenience accessors for attempting to perform key/value lookups
> +     on this value as if it were an json::object.
> +
> +     On success, return true and write the value to OUT_VALUE.
> +     On failure, return false and write an error message to OUT_ERR
> +     (which must be freed by the caller).  */
> +  bool get_value_by_key (const char *name, const value *&out_value,
> +                      char *&out_err) const;
> +  bool get_int_by_key (const char *name, int &out_value, char *&out_err)
> const;
> +  bool get_string_by_key (const char *name, const char *&out_value,
> +                       char *&out_err) const;
> +  bool get_array_by_key (const char *name, const array *&out_value,
> +                      char *&out_err) const;
> +
> +  /* As above, but the key is optional.  THIS must still be an object,
> +     though.  */
> +  bool get_optional_value_by_key (const char *name, const value
> *&out_value,
> +                               char *&out_err) const;
> +  bool get_optional_string_by_key (const char *name, const char
> *&out_value,
> +                                char *&out_err) const;
> +};
> +
> +/* Subclass of value for objects: an unordered collection of
> +   key/value pairs.  */
> +
> +class object : public value
> +{
> + public:
> +  ~object ();
> +
> +  enum kind get_kind () const FINAL OVERRIDE { return JSON_OBJECT; }
> +  void print (pretty_printer *pp) const FINAL OVERRIDE;
> +  value *clone () const FINAL OVERRIDE;
> +
> +  value *get (const char *key) const;
> +  value *get_if_nonnull (const char *key) const;
> +
> +  void set (const char *key, value *v);
> +
> + private:
> +  typedef hash_map <char *, value *,
> +    simple_hashmap_traits<nofree_string_hash, value *> > map_t;
> +  map_t m_map;
> +};
> +
> +/* Subclass of value for arrays.  */
> +
> +class array : public value
> +{
> + public:
> +  ~array ();
> +
> +  enum kind get_kind () const FINAL OVERRIDE { return JSON_ARRAY; }
> +  void print (pretty_printer *pp) const FINAL OVERRIDE;
> +  value *clone () const FINAL OVERRIDE;
> +
> +  unsigned get_length () const { return m_elements.length (); }
> +  value *get (int idx) const { return m_elements[idx]; }
> +  void append (value *v) { m_elements.safe_push (v); }
> +
> + private:
> +  auto_vec<value *> m_elements;
> +};
> +
> +/* Subclass of value for numbers.  */
> +
> +class number : public value
> +{
> + public:
> +  number (double value) : m_value (value) {}
> +
> +  enum kind get_kind () const FINAL OVERRIDE { return JSON_NUMBER; }
> +  void print (pretty_printer *pp) const FINAL OVERRIDE;
> +  value *clone () const FINAL OVERRIDE;
> +
> +  double get () const { return m_value; }
> +
> + private:
> +  double m_value;
> +};
> +
> +/* Subclass of value for strings.  */
> +
> +class string : public value
> +{
> + public:
> +  string (const char *utf8) : m_utf8 (xstrdup (utf8)) {}
> +  ~string () { free (m_utf8); }
> +
> +  enum kind get_kind () const FINAL OVERRIDE { return JSON_STRING; }
> +  void print (pretty_printer *pp) const FINAL OVERRIDE;
> +  value *clone () const FINAL OVERRIDE;
> +
> +  const char *get_string () const { return m_utf8; }
> +
> + private:
> +  char *m_utf8;
> +};
> +
> +/* Subclass of value for the three JSON literals "true", "false",
> +   and "null".  */
> +
> +class literal : public value
> +{
> + public:
> +  literal (enum kind kind) : m_kind (kind) {}
> +
> +  enum kind get_kind () const FINAL OVERRIDE { return m_kind; }
> +  void print (pretty_printer *pp) const FINAL OVERRIDE;
> +  value *clone () const FINAL OVERRIDE;
> +
> + private:
> +  enum kind m_kind;
> +};
> +
> +/* Declarations for parsing JSON to a json::value * tree.  */
> +
> +extern value *parse_utf8_string (size_t length, const char *utf8_buf,
> +                              char **err_out);
> +extern value *parse_utf8_string (const char *utf8, char **err_out);
> +
> +} // namespace json
> +
> +#endif  /* GCC_JSON_H  */
> diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
> index fe221ff..36879cf 100644
> --- a/gcc/selftest-run-tests.c
> +++ b/gcc/selftest-run-tests.c
> @@ -70,6 +70,7 @@ selftest::run_tests ()
>    fibonacci_heap_c_tests ();
>    typed_splay_tree_c_tests ();
>    unique_ptr_tests_cc_tests ();
> +  json_cc_tests ();
>
>    /* Mid-level data structures.  */
>    input_c_tests ();
> diff --git a/gcc/selftest.h b/gcc/selftest.h
> index e3117c6..2a912d8 100644
> --- a/gcc/selftest.h
> +++ b/gcc/selftest.h
> @@ -199,6 +199,7 @@ extern void ggc_tests_c_tests ();
>  extern void hash_map_tests_c_tests ();
>  extern void hash_set_tests_c_tests ();
>  extern void input_c_tests ();
> +extern void json_cc_tests ();
>  extern void pretty_print_c_tests ();
>  extern void read_rtl_function_c_tests ();
>  extern void rtl_tests_c_tests ();
> --
> 1.8.5.3
>
>

Re: [PATCH 02/10] Add JSON implementation

Reply via email to