On Fri, Nov 11, 2022 at 10:23:10PM -0500, David Malcolm wrote: > Changes since v1: ported the doc changes from texinfo to sphinx > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. > > Are the C frontend parts OK for trunk? (I can self-approve the > analyzer parts)
Sorry for the delay. > The patch adds an interface for frontends to call into the analyzer as > the translation unit finishes. The analyzer can then call back into the > frontend to ask about the values of the named constants it cares about > whilst the frontend's data structures are still around. > > The patch implements this for the C frontend, which looks up the names > by looking for named CONST_DECLs (which handles enum values). Failing > that, it attempts to look up the values of macros but only the simplest > cases are supported (a non-traditional macro with a single CPP_NUMBER > token). It does this by building a buffer containing the macro > definition and rerunning a lexer on it. > > The analyzer gracefully handles the cases where named values aren't > found (such as anything more complicated than described above). > > The patch ports the analyzer to use this mechanism for "O_RDONLY", > "O_WRONLY", and "O_ACCMODE". I have successfully tested my socket patch > to also use this for "SOCK_STREAM" and "SOCK_DGRAM", so the technique > seems to work. So this works well for code like enum __socket_type { SOCK_STREAM = 1, #define SOCK_STREAM SOCK_STREAM }; ? > diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc > index d70697b1d63..efe19fbe70b 100644 > --- a/gcc/c/c-parser.cc > +++ b/gcc/c/c-parser.cc > @@ -72,6 +72,8 @@ along with GCC; see the file COPYING3. If not see > #include "memmodel.h" > #include "c-family/known-headers.h" > #include "bitmap.h" > +#include "analyzer/analyzer-language.h" > +#include "toplev.h" > > /* We need to walk over decls with incomplete struct/union/enum types > after parsing the whole translation unit. > @@ -1662,6 +1664,87 @@ static bool c_parser_objc_diagnose_bad_element_prefix > (c_parser *, struct c_declspecs *); > static location_t c_parser_parse_rtl_body (c_parser *, char *); > > +#if ENABLE_ANALYZER > + > +namespace ana { > + > +/* Concrete implementation of ana::translation_unit for the C frontend. */ > + > +class c_translation_unit : public translation_unit > +{ > +public: > + /* Implementation of translation_unit::lookup_constant_by_id for use by the > + analyzer to look up named constants in the user's source code. */ > + tree lookup_constant_by_id (tree id) const final override > + { > + /* Consider decls. */ > + if (tree decl = lookup_name (id)) > + if (TREE_CODE (decl) == CONST_DECL) > + if (tree value = DECL_INITIAL (decl)) > + if (TREE_CODE (value) == INTEGER_CST) > + return value; > + > + /* Consider macros. */ > + cpp_hashnode *hashnode = C_CPP_HASHNODE (id); > + if (cpp_macro_p (hashnode)) > + if (tree value = consider_macro (hashnode->value.macro)) > + return value; > + > + return NULL_TREE; > + } > + > +private: > + /* Attempt to get an INTEGER_CST from MACRO. > + Only handle the simplest cases: where MACRO's definition is a single > + token containing a number, by lexing the number again. > + This will handle e.g. > + #define NAME 42 > + and other bases but not negative numbers, parentheses or e.g. > + #define NAME 1 << 7 > + as doing so would require a parser. */ > + tree consider_macro (cpp_macro *macro) const > + { > + if (macro->paramc > 0) > + return NULL_TREE; > + if (macro->kind == cmk_traditional) Do you really want to handle cmk_assert? I'd say you want if (macro->kind != cmk_macro) > + return NULL_TREE; > + if (macro->count != 1) > + return NULL_TREE; > + const cpp_token &tok = macro->exp.tokens[0]; > + if (tok.type != CPP_NUMBER) > + return NULL_TREE; > + > + cpp_reader *old_parse_in = parse_in; > + parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX: CLK_GNUC89, > + ident_hash, line_table); Why not always CLK_GNUC89 since we're in the C FE? > + > + pretty_printer pp; > + pp_string (&pp, (const char *)tok.val.str.text); A space after ')'. > + pp_newline (&pp); > + cpp_push_buffer (parse_in, > + (const unsigned char *)pp_formatted_text (&pp), Likewise. > + strlen (pp_formatted_text (&pp)), > + 0); > + > + tree value; > + location_t loc; > + unsigned char cpp_flags; > + c_lex_with_flags (&value, &loc, &cpp_flags, 0); > + > + cpp_destroy (parse_in); > + parse_in = old_parse_in; > + > + if (value && TREE_CODE (value) == INTEGER_CST) > + return value; > + > + return NULL_TREE; > + } > +}; > + > +} // namespace ana > + > +#endif /* #if ENABLE_ANALYZER */ > + > /* Parse a translation unit (C90 6.7, C99 6.9, C11 6.9). > > translation-unit: > @@ -1722,6 +1805,14 @@ c_parser_translation_unit (c_parser *parser) > "#pragma omp begin assumes", "#pragma omp end assumes"); > current_omp_begin_assumes = 0; > } > + > +#if ENABLE_ANALYZER > + if (flag_analyzer) > + { > + ana::c_translation_unit tu; > + ana::on_finish_translation_unit (tu); > + } > +#endif > } Marek