This is an updated version of:
  https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00736.html

Changes in V2 of the patch:
  * c_lex_with_flags: don't write through the range ptr if it's NULL
  * don't add any fields to the C++ frontend's cp_token for now
  * libcpp/lex.c: prevent usage of stale/uninitialized data in
    _cpp_temp_token and _cpp_lex_direct.

This patch adds source *range* information to libcpp's cpp_token, and to
c_token in the C frontend.

As noted before, to minimize churn, I kept the existing location_t
fields, though in theory these are always just equal to the start of
the source range.

cpplib.h's struct cpp_token had this comment:

  /* A preprocessing token.  This has been carefully packed and should
     occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.  */

which, like the v1 equivalent, this patch invalidates.

See the cover-letter for this patch kit which describes how we might
go back to using just a location_t, and stashing the range inside the
location_t.  I'm doing it this way for now to allow for more
flexibility as I benchmark and explore implementation options.

gcc/c-family/ChangeLog:
        * c-lex.c (c_lex_with_flags): Add "range" param, and write back
        to *range with the range of the libcpp token if non-NULL.
        * c-pragma.h (c_lex_with_flags): Add "range" param.

gcc/c/ChangeLog:
        * c-parser.c (struct c_token): Add "range" field.
        (c_lex_one_token): Write back to token->range in call to
        c_lex_with_flags.

gcc/cp/ChangeLog:
        * parser.c (cp_lexer_get_preprocessor_token): Update call to
        c_lex_with_flags to pass NULL for range ptr.

libcpp/ChangeLog:
        * include/cpplib.h (struct cpp_token): Add src_range field.
        * lex.c (_cpp_lex_direct): Set up the src_range on the token.
---
 gcc/c-family/c-lex.c    |  9 +++++++--
 gcc/c-family/c-pragma.h |  4 ++--
 gcc/c/c-parser.c        |  6 +++++-
 gcc/cp/parser.c         |  3 ++-
 libcpp/include/cpplib.h |  4 +++-
 libcpp/lex.c            | 14 ++++++++++++++
 6 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index 55ceb20..57a626e 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -380,11 +380,14 @@ c_common_has_attribute (cpp_reader *pfile)
 }
 
 /* Read a token and return its type.  Fill *VALUE with its value, if
-   applicable.  Fill *CPP_FLAGS with the token's flags, if it is
+   applicable.  Fill *LOC with the source location of the token.
+   If non-NULL, fill *RANGE with the source range of the token.
+   Fill *CPP_FLAGS with the token's flags, if it is
    non-NULL.  */
 
 enum cpp_ttype
-c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
+c_lex_with_flags (tree *value, location_t *loc, source_range *range,
+                 unsigned char *cpp_flags,
                  int lex_flags)
 {
   static bool no_more_pch;
@@ -397,6 +400,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned 
char *cpp_flags,
  retry:
   tok = cpp_get_token_with_location (parse_in, loc);
   type = tok->type;
+  if (range)
+    *range = tok->src_range;
 
  retry_after_at:
   switch (type)
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index f6e1090..3b94e44 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -225,8 +225,8 @@ extern enum cpp_ttype pragma_lex (tree *, location_t *loc = 
NULL);
 /* This is not actually available to pragma parsers.  It's merely a
    convenient location to declare this function for c-lex, after
    having enum cpp_ttype declared.  */
-extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, unsigned char *,
-                                       int);
+extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, source_range *,
+                                       unsigned char *, int);
 
 extern void c_pp_lookup_pragma (unsigned int, const char **, const char **);
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 2fab3f0..5edf563 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -170,6 +170,8 @@ struct GTY (()) c_token {
   ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
   /* The location at which this token was found.  */
   location_t location;
+  /* The source range at which this token was found.  */
+  source_range range;
   /* The value associated with this token, if any.  */
   tree value;
 };
@@ -239,7 +241,9 @@ c_lex_one_token (c_parser *parser, c_token *token)
 {
   timevar_push (TV_LEX);
 
-  token->type = c_lex_with_flags (&token->value, &token->location, NULL,
+  token->type = c_lex_with_flags (&token->value, &token->location,
+                                 &token->range,
+                                 NULL,
                                  (parser->lex_untranslated_string
                                   ? C_LEX_STRING_NO_TRANSLATE : 0));
   token->id_kind = C_ID_NONE;
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 0134189..9423755 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -764,7 +764,8 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token 
*token)
 
    /* Get a new token from the preprocessor.  */
   token->type
-    = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
+    = c_lex_with_flags (&token->u.value, &token->location,
+                        NULL, &token->flags,
                        lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN);
   token->keyword = RID_MAX;
   token->pragma_kind = PRAGMA_NONE;
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index a2bdfa0..0b1a403 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -235,9 +235,11 @@ struct GTY(()) cpp_identifier {
 };
 
 /* A preprocessing token.  This has been carefully packed and should
-   occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.  */
+   occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.
+   FIXME: the above comment is no longer true with this patch.  */
 struct GTY(()) cpp_token {
   source_location src_loc;     /* Location of first char of token.  */
+  source_range src_range;      /* Source range covered by the token.  */
   ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT;  /* token type */
   unsigned short flags;                /* flags - see above */
 
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 0aa1090..a6f16b2 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -2169,6 +2169,8 @@ _cpp_temp_token (cpp_reader *pfile)
 
   result = pfile->cur_token++;
   result->src_loc = old->src_loc;
+  result->src_range.m_start = old->src_loc;
+  result->src_range.m_finish = old->src_loc;
   return result;
 }
 
@@ -2365,6 +2367,13 @@ _cpp_lex_direct (cpp_reader *pfile)
     result->src_loc = linemap_position_for_column (pfile->line_table,
                                          CPP_BUF_COLUMN (buffer, buffer->cur));
 
+  /* The token's src_range begins here.  */
+  result->src_range.m_start = result->src_loc;
+
+  /* Ensure m_finish is also initialized, in case we bail out above
+     via a "goto fresh_line;" below.  */
+  result->src_range.m_finish = result->src_loc;
+
   switch (c)
     {
     case ' ': case '\t': case '\f': case '\v': case '\0':
@@ -2723,6 +2732,11 @@ _cpp_lex_direct (cpp_reader *pfile)
       break;
     }
 
+  /* The token's src_range ends here.  */
+  result->src_range.m_finish =
+    linemap_position_for_column (pfile->line_table,
+                                CPP_BUF_COLUMN (buffer, buffer->cur));
+
   return result;
 }
 
-- 
1.8.5.3

Reply via email to