This is an updated version of: https://gcc.gnu.org/ml/gcc-patches/2015-09/msg00736.html
Changes in V2 of the patch: * c_lex_with_flags: don't write through the range ptr if it's NULL * don't add any fields to the C++ frontend's cp_token for now * libcpp/lex.c: prevent usage of stale/uninitialized data in _cpp_temp_token and _cpp_lex_direct. This patch adds source *range* information to libcpp's cpp_token, and to c_token in the C frontend. As noted before, to minimize churn, I kept the existing location_t fields, though in theory these are always just equal to the start of the source range. cpplib.h's struct cpp_token had this comment: /* A preprocessing token. This has been carefully packed and should occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */ which, like the v1 equivalent, this patch invalidates. See the cover-letter for this patch kit which describes how we might go back to using just a location_t, and stashing the range inside the location_t. I'm doing it this way for now to allow for more flexibility as I benchmark and explore implementation options. gcc/c-family/ChangeLog: * c-lex.c (c_lex_with_flags): Add "range" param, and write back to *range with the range of the libcpp token if non-NULL. * c-pragma.h (c_lex_with_flags): Add "range" param. gcc/c/ChangeLog: * c-parser.c (struct c_token): Add "range" field. (c_lex_one_token): Write back to token->range in call to c_lex_with_flags. gcc/cp/ChangeLog: * parser.c (cp_lexer_get_preprocessor_token): Update call to c_lex_with_flags to pass NULL for range ptr. libcpp/ChangeLog: * include/cpplib.h (struct cpp_token): Add src_range field. * lex.c (_cpp_lex_direct): Set up the src_range on the token. --- gcc/c-family/c-lex.c | 9 +++++++-- gcc/c-family/c-pragma.h | 4 ++-- gcc/c/c-parser.c | 6 +++++- gcc/cp/parser.c | 3 ++- libcpp/include/cpplib.h | 4 +++- libcpp/lex.c | 14 ++++++++++++++ 6 files changed, 33 insertions(+), 7 deletions(-) diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c index 55ceb20..57a626e 100644 --- a/gcc/c-family/c-lex.c +++ b/gcc/c-family/c-lex.c @@ -380,11 +380,14 @@ c_common_has_attribute (cpp_reader *pfile) } /* Read a token and return its type. Fill *VALUE with its value, if - applicable. Fill *CPP_FLAGS with the token's flags, if it is + applicable. Fill *LOC with the source location of the token. + If non-NULL, fill *RANGE with the source range of the token. + Fill *CPP_FLAGS with the token's flags, if it is non-NULL. */ enum cpp_ttype -c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, +c_lex_with_flags (tree *value, location_t *loc, source_range *range, + unsigned char *cpp_flags, int lex_flags) { static bool no_more_pch; @@ -397,6 +400,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, retry: tok = cpp_get_token_with_location (parse_in, loc); type = tok->type; + if (range) + *range = tok->src_range; retry_after_at: switch (type) diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index f6e1090..3b94e44 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -225,8 +225,8 @@ extern enum cpp_ttype pragma_lex (tree *, location_t *loc = NULL); /* This is not actually available to pragma parsers. It's merely a convenient location to declare this function for c-lex, after having enum cpp_ttype declared. */ -extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, unsigned char *, - int); +extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, source_range *, + unsigned char *, int); extern void c_pp_lookup_pragma (unsigned int, const char **, const char **); diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 2fab3f0..5edf563 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -170,6 +170,8 @@ struct GTY (()) c_token { ENUM_BITFIELD (pragma_kind) pragma_kind : 8; /* The location at which this token was found. */ location_t location; + /* The source range at which this token was found. */ + source_range range; /* The value associated with this token, if any. */ tree value; }; @@ -239,7 +241,9 @@ c_lex_one_token (c_parser *parser, c_token *token) { timevar_push (TV_LEX); - token->type = c_lex_with_flags (&token->value, &token->location, NULL, + token->type = c_lex_with_flags (&token->value, &token->location, + &token->range, + NULL, (parser->lex_untranslated_string ? C_LEX_STRING_NO_TRANSLATE : 0)); token->id_kind = C_ID_NONE; diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 0134189..9423755 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -764,7 +764,8 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token *token) /* Get a new token from the preprocessor. */ token->type - = c_lex_with_flags (&token->u.value, &token->location, &token->flags, + = c_lex_with_flags (&token->u.value, &token->location, + NULL, &token->flags, lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN); token->keyword = RID_MAX; token->pragma_kind = PRAGMA_NONE; diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index a2bdfa0..0b1a403 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -235,9 +235,11 @@ struct GTY(()) cpp_identifier { }; /* A preprocessing token. This has been carefully packed and should - occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */ + occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. + FIXME: the above comment is no longer true with this patch. */ struct GTY(()) cpp_token { source_location src_loc; /* Location of first char of token. */ + source_range src_range; /* Source range covered by the token. */ ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */ unsigned short flags; /* flags - see above */ diff --git a/libcpp/lex.c b/libcpp/lex.c index 0aa1090..a6f16b2 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -2169,6 +2169,8 @@ _cpp_temp_token (cpp_reader *pfile) result = pfile->cur_token++; result->src_loc = old->src_loc; + result->src_range.m_start = old->src_loc; + result->src_range.m_finish = old->src_loc; return result; } @@ -2365,6 +2367,13 @@ _cpp_lex_direct (cpp_reader *pfile) result->src_loc = linemap_position_for_column (pfile->line_table, CPP_BUF_COLUMN (buffer, buffer->cur)); + /* The token's src_range begins here. */ + result->src_range.m_start = result->src_loc; + + /* Ensure m_finish is also initialized, in case we bail out above + via a "goto fresh_line;" below. */ + result->src_range.m_finish = result->src_loc; + switch (c) { case ' ': case '\t': case '\f': case '\v': case '\0': @@ -2723,6 +2732,11 @@ _cpp_lex_direct (cpp_reader *pfile) break; } + /* The token's src_range ends here. */ + result->src_range.m_finish = + linemap_position_for_column (pfile->line_table, + CPP_BUF_COLUMN (buffer, buffer->cur)); + return result; } -- 1.8.5.3