ilya-biryukov updated this revision to Diff 193901.
ilya-biryukov added a comment.

Pass an enum indicating where the token comes from.
The enum is ad-hoc at the moment, will require some thought to turn
it into a reasonable abstraction.

The consumer of the token stream actually needs to be able to distinguish
tokens which are part of the final expanded  token stream vs those that
aren't (macro directives, intermediate macro argument expansions, etc)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59885/new/

https://reviews.llvm.org/D59885

Files:
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Lex/TokenLexer.h
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Lex/PPMacroExpansion.cpp
  clang/lib/Lex/Preprocessor.cpp

Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -864,20 +864,32 @@
 void Preprocessor::Lex(Token &Result) {
   // We loop here until a lex function returns a token; this avoids recursion.
   bool ReturnedToken;
+  TokenSource Source;
   do {
     switch (CurLexerKind) {
     case CLK_Lexer:
+      if (CurLexer->ParsingPreprocessorDirective)
+        Source = TokenSource::MacroDirective;
+      else if (DisableMacroExpansion)
+        Source = TokenSource::MacroNameOrArg;
+      else
+        Source = TokenSource::File;
+
       ReturnedToken = CurLexer->Lex(Result);
       break;
     case CLK_TokenLexer:
+      Source = CurTokenLexer->isMacroExpansion() ? TokenSource::MacroExpansion
+                                                 : TokenSource::Precached;
       ReturnedToken = CurTokenLexer->Lex(Result);
       break;
     case CLK_CachingLexer:
       CachingLex(Result);
+      Source = TokenSource::Precached;
       ReturnedToken = true;
       break;
     case CLK_LexAfterModuleImport:
       LexAfterModuleImport(Result);
+      Source = TokenSource::AfterModuleImport;
       ReturnedToken = true;
       break;
     }
@@ -893,6 +905,8 @@
   }
 
   LastTokenWasAt = Result.is(tok::at);
+  if (OnToken)
+    OnToken(Result, Source);
 }
 
 /// Lex a header-name token (including one formed from header-name-tokens if
Index: clang/lib/Lex/PPMacroExpansion.cpp
===================================================================
--- clang/lib/Lex/PPMacroExpansion.cpp
+++ clang/lib/Lex/PPMacroExpansion.cpp
@@ -463,6 +463,10 @@
                                                  const MacroDefinition &M) {
   MacroInfo *MI = M.getMacroInfo();
 
+  // The macro-expanded identifiers are not seen by the Lex() method.
+  if (OnToken)
+    OnToken(Identifier, TokenSource::MacroNameOrArg);
+
   // If this is a macro expansion in the "#if !defined(x)" line for the file,
   // then the macro could expand to different things in other contexts, we need
   // to disable the optimization in this case.
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -400,6 +400,9 @@
       setCodeCompletionReached();
       continue;
     }
+    // This token is not reported to
+    if (OnToken)
+      OnToken(Tok, TokenSource::SkippedPPBranch);
 
     // If this is the end of the buffer, we have an error.
     if (Tok.is(tok::eof)) {
@@ -865,6 +868,10 @@
   // Save the '#' token in case we need to return it later.
   Token SavedHash = Result;
 
+  // Lex() never sees the '#' token from directives, so report it here.
+  if (OnToken)
+    OnToken(Result, TokenSource::MacroDirective);
+
   // Read the next token, the directive flavor.  This isn't expanded due to
   // C99 6.10.3p8.
   LexUnexpandedToken(Result);
Index: clang/include/clang/Lex/TokenLexer.h
===================================================================
--- clang/include/clang/Lex/TokenLexer.h
+++ clang/include/clang/Lex/TokenLexer.h
@@ -147,6 +147,10 @@
   /// preprocessor directive.
   bool isParsingPreprocessorDirective() const;
 
+  /// Returns true iff the TokenLexer is expanding a macro and not replaying a
+  /// stream of tokens.
+  bool isMacroExpansion() const { return Macro != nullptr; }
+
 private:
   void destroy();
 
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -33,6 +33,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerUnion.h"
@@ -48,8 +49,8 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <memory>
 #include <map>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -114,6 +115,21 @@
   MU_Undef  = 2
 };
 
+/// Captures some information about where the tokens come from. Used by the
+/// callback that records tokens.
+enum class TokenSource {
+  File, // a token coming directly from a file that is not a macro directive,
+        // a macro name, etc.
+  MacroNameOrArg,  // a name or an argument of a macro expansion.
+  MacroExpansion,  // a token coming from a body of a macro expansion.
+  MacroDirective,  // a token from a macro directive body, i.e. '#' and all the
+                   // tokens till the end of the line,
+  SkippedPPBranch, // a token from a disabled #if or #ifdef branch.
+  Precached,       // a token from a previously saved token stream.
+  AfterModuleImport, // FIXME: look into this case more closely, describe what
+                     // is is.
+};
+
 /// Engages in a tight little dance with the lexer to efficiently
 /// preprocess tokens.
 ///
@@ -124,6 +140,7 @@
   friend class VAOptDefinitionContext;
   friend class VariadicMacroScopeGuard;
 
+  llvm::unique_function<void(const clang::Token &, TokenSource)> OnToken;
   std::shared_ptr<PreprocessorOptions> PPOpts;
   DiagnosticsEngine        *Diags;
   LangOptions       &LangOpts;
@@ -911,6 +928,15 @@
   }
   /// \}
 
+  /// Register a function that would be called on each token seen by the
+  /// preprocessor. This is a very low-level hook, the produced token stream is
+  /// tied to the internals of the preprocessor so interpreting result of the
+  /// callback is hard.
+  void setTokenWatcher(
+      llvm::unique_function<void(const clang::Token &, TokenSource)> F) {
+    OnToken = std::move(F);
+  }
+
   bool isMacroDefined(StringRef Id) {
     return isMacroDefined(&Identifiers.get(Id));
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to