hans created this revision.
hans added reviewers: thakis, rnk, rsmith.

See 
https://docs.google.com/document/d/1xMkTZMKx9llnMPgso0jrx3ankI4cv60xeZ0y4ksf4wc/preview
 for background discussion.

This adds a warning, flags and pragmas to limit the number of pre-processor 
tokens either at a certain point in a translation unit, or overall.

The idea is that this would allow projects to limit the size of certain widely 
included headers, or for translation units overall, as a way to insert 
backstops for header bloat and prevent compile-time regressions.

What do you think?


https://reviews.llvm.org/D72703

Files:
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticParseKinds.td
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Parse/Parser.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Lex/Preprocessor.cpp
  clang/lib/Parse/ParsePragma.cpp
  clang/lib/Parse/Parser.cpp
  clang/test/Driver/autocomplete.c
  clang/test/Parser/max-tokens.cpp

Index: clang/test/Parser/max-tokens.cpp
===================================================================
--- /dev/null
+++ clang/test/Parser/max-tokens.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DMAX_TOKENS          -fmax-tokens 2
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DMAX_TOKENS_OVERRIDE -fmax-tokens 9
+
+int x, y, z;
+
+#pragma clang max_tokens         // expected-error  {{missing argument to '#pragma clang max_tokens'; expected integer}}
+#pragma clang max_tokens foo     // expected-error  {{expected an integer argument in '#pragma clang max_tokens'}}
+#pragma clang max_tokens 123 456 // expected-warning{{extra tokens at end of '#pragma clang max_tokens' - ignored}}
+
+#pragma clang max_tokens 1 // expected-warning{{the number of preprocessor source tokens (7) exceeds this token limit (1)}}
+
+
+#pragma clang max_file_tokens // expected-error{{missing argument to '#pragma clang max_file_tokens'; expected integer}}
+#pragma clang max_file_tokens foo // expected-error{{expected an integer argument in '#pragma clang max_file_tokens'}}
+#pragma clang max_file_tokens 123 456 // expected-warning{{extra tokens at end of '#pragma clang max_file_tokens' - ignored}}
+
+#ifdef MAX_TOKENS_OVERRIDE
+#pragma clang max_file_tokens 3 // expected-warning@+4{{the number of preprocessor source tokens in this file (8) exceeds the token limit (3)}}
+                                // expected-note@-1{{file token limit set here}}
+#elif MAX_TOKENS
+// expected-warning@+1{{the number of preprocessor source tokens in this file (8) exceeds the token limit (2)}}
+#endif
Index: clang/test/Driver/autocomplete.c
===================================================================
--- clang/test/Driver/autocomplete.c
+++ clang/test/Driver/autocomplete.c
@@ -99,6 +99,7 @@
 // WARNING-NEXT: -Wmain-return-type
 // WARNING-NEXT: -Wmalformed-warning-check
 // WARNING-NEXT: -Wmany-braces-around-scalar-init
+// WARNING-NEXT: -Wmax-tokens
 // WARNING-NEXT: -Wmax-unsigned-zero
 // RUN: %clang --autocomplete=-Wno-invalid-pp- | FileCheck %s -check-prefix=NOWARNING
 // NOWARNING: -Wno-invalid-pp-token
Index: clang/lib/Parse/Parser.cpp
===================================================================
--- clang/lib/Parse/Parser.cpp
+++ clang/lib/Parse/Parser.cpp
@@ -650,6 +650,16 @@
     return false;
 
   case tok::eof:
+    // Check whether -fmax-tokens was reached.
+    if (PP.getTokenCount() > PP.getMaxTokens()) {
+      PP.Diag(Tok.getLocation(), diag::warn_max_tokens_file)
+          << PP.getTokenCount() << PP.getMaxTokens();
+      SourceLocation OverrideLoc = PP.getMaxTokensOverrideLoc();
+      if (OverrideLoc.isValid()) {
+        PP.Diag(OverrideLoc, diag::note_max_tokens_file_override);
+      }
+    }
+
     // Late template parsing can begin.
     if (getLangOpts().DelayedTemplateParsing)
       Actions.SetLateTemplateParser(LateTemplateParserCallback,
Index: clang/lib/Parse/ParsePragma.cpp
===================================================================
--- clang/lib/Parse/ParsePragma.cpp
+++ clang/lib/Parse/ParsePragma.cpp
@@ -262,6 +262,18 @@
   ParsedAttributes AttributesForPragmaAttribute;
 };
 
+struct PragmaMaxTokensHandler : public PragmaHandler {
+  PragmaMaxTokensHandler() : PragmaHandler("max_tokens") {}
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+                    Token &FirstToken) override;
+};
+
+struct PragmaMaxFileTokensHandler : public PragmaHandler {
+  PragmaMaxFileTokensHandler() : PragmaHandler("max_file_tokens") {}
+  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+                    Token &FirstToken) override;
+};
+
 }  // end namespace
 
 void Parser::initializePragmaHandlers() {
@@ -382,6 +394,12 @@
   AttributePragmaHandler =
       std::make_unique<PragmaAttributeHandler>(AttrFactory);
   PP.AddPragmaHandler("clang", AttributePragmaHandler.get());
+
+  MaxTokensPragmaHandler = std::make_unique<PragmaMaxTokensHandler>();
+  PP.AddPragmaHandler("clang", MaxTokensPragmaHandler.get());
+
+  MaxFileTokensPragmaHandler = std::make_unique<PragmaMaxFileTokensHandler>();
+  PP.AddPragmaHandler("clang", MaxFileTokensPragmaHandler.get());
 }
 
 void Parser::resetPragmaHandlers() {
@@ -487,6 +505,12 @@
 
   PP.RemovePragmaHandler("clang", AttributePragmaHandler.get());
   AttributePragmaHandler.reset();
+
+  PP.RemovePragmaHandler("clang", MaxTokensPragmaHandler.get());
+  MaxTokensPragmaHandler.reset();
+
+  PP.RemovePragmaHandler("clang", MaxFileTokensPragmaHandler.get());
+  MaxFileTokensPragmaHandler.reset();
 }
 
 /// Handle the annotation token produced for #pragma unused(...)
@@ -3279,3 +3303,64 @@
   PP.EnterTokenStream(std::move(TokenArray), 1,
                       /*DisableMacroExpansion=*/false, /*IsReinject=*/false);
 }
+
+// Handle '#pragma clang max_tokens 12345'.
+void PragmaMaxTokensHandler::HandlePragma(Preprocessor &PP,
+                                          PragmaIntroducer Introducer,
+                                          Token &Tok) {
+  PP.Lex(Tok);
+  if (Tok.is(tok::eod)) {
+    PP.Diag(Tok.getLocation(), diag::err_pragma_missing_argument)
+        << "clang max_tokens" << /*Expected=*/true << "integer";
+    return;
+  }
+
+  SourceLocation Loc = Tok.getLocation();
+  uint64_t MaxTokens;
+  if (Tok.isNot(tok::numeric_constant) ||
+      !PP.parseSimpleIntegerLiteral(Tok, MaxTokens)) {
+    PP.Diag(Tok.getLocation(), diag::err_pragma_expected_integer)
+        << "clang max_tokens";
+    return;
+  }
+
+  if (Tok.isNot(tok::eod)) {
+    PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
+        << "clang max_tokens";
+    return;
+  }
+
+  if (PP.getTokenCount() > MaxTokens) {
+    PP.Diag(Loc, diag::warn_max_tokens)
+        << PP.getTokenCount() << (unsigned)MaxTokens;
+  }
+}
+
+// Handle '#pragma clang max_file_tokens 12345'.
+void PragmaMaxFileTokensHandler::HandlePragma(Preprocessor &PP,
+                                              PragmaIntroducer Introducer,
+                                              Token &Tok) {
+  PP.Lex(Tok);
+  if (Tok.is(tok::eod)) {
+    PP.Diag(Tok.getLocation(), diag::err_pragma_missing_argument)
+        << "clang max_file_tokens" << /*Expected=*/true << "integer";
+    return;
+  }
+
+  SourceLocation Loc = Tok.getLocation();
+  uint64_t MaxTokens;
+  if (Tok.isNot(tok::numeric_constant) ||
+      !PP.parseSimpleIntegerLiteral(Tok, MaxTokens)) {
+    PP.Diag(Tok.getLocation(), diag::err_pragma_expected_integer)
+        << "clang max_file_tokens";
+    return;
+  }
+
+  if (Tok.isNot(tok::eod)) {
+    PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
+        << "clang max_file_tokens";
+    return;
+  }
+
+  PP.overrideMaxTokens(MaxTokens, Loc);
+}
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -166,6 +166,10 @@
       this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
   if (ExcludedConditionalDirectiveSkipMappings)
     ExcludedConditionalDirectiveSkipMappings->clear();
+
+  if (LangOpts.MaxTokens) {
+    MaxTokens = LangOpts.MaxTokens;
+  }
 }
 
 Preprocessor::~Preprocessor() {
@@ -959,8 +963,12 @@
 
   LastTokenWasAt = Result.is(tok::at);
   --LexLevel;
-  if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected))
-    OnToken(Result);
+
+  if (LexLevel == 0 && !Result.getFlag(Token::IsReinjected)) {
+    ++TokenCount;
+    if (OnToken)
+      OnToken(Result);
+  }
 }
 
 /// Lex a header-name token (including one formed from header-name-tokens if
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3294,6 +3294,8 @@
 
   Opts.CompleteMemberPointers = Args.hasArg(OPT_fcomplete_member_pointers);
   Opts.BuildingPCHWithObjectFile = Args.hasArg(OPT_building_pch_with_obj);
+
+  Opts.MaxTokens = getLastArgIntValue(Args, OPT_fmax_tokens, 0, Diags);
 }
 
 static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) {
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5629,6 +5629,8 @@
   Args.AddLastArg(CmdArgs, options::OPT_dM);
   Args.AddLastArg(CmdArgs, options::OPT_dD);
 
+  Args.AddLastArg(CmdArgs, options::OPT_fmax_tokens);
+
   // Handle serialized diagnostics.
   if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
     CmdArgs.push_back("-serialize-diagnostic-file");
Index: clang/include/clang/Parse/Parser.h
===================================================================
--- clang/include/clang/Parse/Parser.h
+++ clang/include/clang/Parse/Parser.h
@@ -201,6 +201,8 @@
   std::unique_ptr<PragmaHandler> STDCCXLIMITHandler;
   std::unique_ptr<PragmaHandler> STDCUnknownHandler;
   std::unique_ptr<PragmaHandler> AttributePragmaHandler;
+  std::unique_ptr<PragmaHandler> MaxTokensPragmaHandler;
+  std::unique_ptr<PragmaHandler> MaxFileTokensPragmaHandler;
 
   std::unique_ptr<CommentHandler> CommentSemaHandler;
 
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -416,6 +416,14 @@
   /// of phase 4 of translation or for some other situation.
   unsigned LexLevel = 0;
 
+  /// The number of (LexLevel 0) preprocessor tokens.
+  unsigned TokenCount = 0;
+
+  /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
+  /// warning, or zero for unlimited.
+  unsigned MaxTokens = UINT_MAX;
+  SourceLocation MaxTokensOverrideLoc;
+
 public:
   struct PreambleSkipInfo {
     SourceLocation HashTokenLoc;
@@ -1010,6 +1018,19 @@
   }
   /// \}
 
+  /// Get the number of tokens processed so far.
+  unsigned getTokenCount() const { return TokenCount; }
+
+  /// Get the max number of tokens before issuing a -Wmax-tokens warning.
+  unsigned getMaxTokens() const { return MaxTokens; }
+
+  void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
+    MaxTokens = Value;
+    MaxTokensOverrideLoc = Loc;
+  };
+
+  SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
+
   /// Register a function that would be called on each token in the final
   /// expanded token stream.
   /// This also reports annotation tokens produced by the parser.
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -646,6 +646,9 @@
 def interface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version=">, Flags<[CC1Option]>;
 def exported__symbols__list : Separate<["-"], "exported_symbols_list">;
 def e : JoinedOrSeparate<["-"], "e">, Group<Link_Group>;
+def fmax_tokens : Separate<["-"], "fmax-tokens">,
+  HelpText<"Max number of preprocessed tokens for -Wmax-tokens.">,
+  Group<f_Group>, Flags<[CC1Option]>;
 def fPIC : Flag<["-"], "fPIC">, Group<f_Group>;
 def fno_PIC : Flag<["-"], "fno-PIC">, Group<f_Group>;
 def fPIE : Flag<["-"], "fPIE">, Group<f_Group>;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -344,6 +344,8 @@
 
 LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors")
 
+COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0")
+
 #undef LANGOPT
 #undef COMPATIBLE_LANGOPT
 #undef BENIGN_LANGOPT
Index: clang/include/clang/Basic/DiagnosticParseKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticParseKinds.td
+++ clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1014,6 +1014,8 @@
 def warn_pragma_expected_init_seg : Warning<
   "expected 'compiler', 'lib', 'user', or a string literal for the section name in '#pragma %0' - ignored">,
   InGroup<IgnoredPragmas>;
+
+def err_pragma_expected_integer : Error<"expected an integer argument in '#pragma %0'">;
 def warn_pragma_expected_integer : Warning<
   "expected integer between %0 and %1 inclusive in '#pragma %2' - ignored">,
   InGroup<IgnoredPragmas>;
@@ -1341,4 +1343,14 @@
   InGroup<DiagGroup<"concepts-ts-compat">>;
 }
 
+def warn_max_tokens : Warning<
+  "the number of preprocessor source tokens (%0) exceeds this token limit (%1)">,
+  InGroup<MaxTokens>;
+
+def warn_max_tokens_file : Warning<
+  "the number of preprocessor source tokens in this file (%0) exceeds the token limit (%1)">,
+  InGroup<MaxTokens>;
+
+def note_max_tokens_file_override : Note<"file token limit set here">;
+
 } // end of Parser diagnostics
Index: clang/include/clang/Basic/DiagnosticGroups.td
===================================================================
--- clang/include/clang/Basic/DiagnosticGroups.td
+++ clang/include/clang/Basic/DiagnosticGroups.td
@@ -1149,3 +1149,30 @@
 def CTADMaybeUnsupported : DiagGroup<"ctad-maybe-unsupported">;
 
 def FortifySource : DiagGroup<"fortify-source">;
+
+def MaxTokens : DiagGroup<"max-tokens"> {
+  code Documentation = [{
+The warning is issued if the number of pre-processor tokens exceeds
+the token limit, which can be set in three ways:
+
+1. As a limit at a specific point in a file, using the ``clang max_tokens``
+  pragma:
+
+   .. code-block: c++
+      #pragma clang max_tokens 1234
+
+2. As a per-translation unit limit, using the ``-fmax-tokens`` command-line
+   flag:
+
+   .. code-block: console
+      clang -c a.cpp -fmax-tokens 1234
+
+3. As a per-translation unit limit using the ``clang max_file_tokens`` pragma,
+   which works like and overrides the ``-fmax-tokens`` flag:
+
+   .. code-block: c++
+      #pragma clang max_file_tokens 1234
+
+These limits can be helpful in limiting code growth through included files.
+}];
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to