akyrtzi created this revision.
Herald added a project: All.
akyrtzi requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Depends on D125487 <https://reviews.llvm.org/D125487>

This is 4/4 of a series of patches, bringing the following benefits:

- Full access to the preprocessor state during dependency scanning. E.g. a 
component can see what includes were taken and where they were located in the 
actual sources.
- Improved performance for dependency scanning. Measurements with a 
release+thin-LTO build shows ~ -11% reduction in wall time.
- Opportunity to use dependency scanning lexing to speed-up skipping of 
excluded conditional blocks during normal preprocessing (as follow-up, not part 
of this patch).

For normal preprocessing measurements show differences are below the noise 
level.

Since, after this change, we don't minimize sources and pass them in place of 
the real sources, `DependencyScanningFilesystem` is not technically necessary, 
but it has valuable performance benefits for caching file `stat`s along with 
the results of scanning the sources. So the setup of using the 
`DependencyScanningFilesystem` during a dependency scan remains.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125488

Files:
  clang/include/clang/Lex/Lexer.h
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Lex/PreprocessorOptions.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/lib/Lex/Lexer.cpp
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Lex/PPLexerChange.cpp
  clang/lib/Lex/Preprocessor.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp

Index: clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -200,6 +200,17 @@
       // filesystem.
       FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
           ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS));
+
+      llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> LocalDepFS =
+          DepFS;
+      ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile =
+          [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File)
+          -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+        if (llvm::ErrorOr<EntryRef> Entry =
+                LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
+          return Entry->getDirectiveTokens();
+        return None;
+      };
     }
 
     // Create the dependency collector that will collect the produced
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -377,7 +377,9 @@
 
 void Preprocessor::recomputeCurLexerKind() {
   if (CurLexer)
-    CurLexerKind = CLK_Lexer;
+    CurLexerKind = CurLexer->isDependencyDirectivesLexer()
+                       ? CLK_DependencyDirectivesLexer
+                       : CLK_Lexer;
   else if (CurTokenLexer)
     CurLexerKind = CLK_TokenLexer;
   else
@@ -640,6 +642,9 @@
     case CLK_CachingLexer:
       CachingLex(Tok);
       break;
+    case CLK_DependencyDirectivesLexer:
+      CurLexer->LexDependencyDirectiveToken(Tok);
+      break;
     case CLK_LexAfterModuleImport:
       LexAfterModuleImport(Tok);
       break;
@@ -901,6 +906,9 @@
       CachingLex(Result);
       ReturnedToken = true;
       break;
+    case CLK_DependencyDirectivesLexer:
+      ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result);
+      break;
     case CLK_LexAfterModuleImport:
       ReturnedToken = LexAfterModuleImport(Result);
       break;
Index: clang/lib/Lex/PPLexerChange.cpp
===================================================================
--- clang/lib/Lex/PPLexerChange.cpp
+++ clang/lib/Lex/PPLexerChange.cpp
@@ -91,8 +91,19 @@
         CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
   }
 
-  EnterSourceFileWithLexer(
-      new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir);
+  Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile);
+  if (getPreprocessorOpts().DependencyDirectivesForFile &&
+      FID != PredefinesFileID) {
+    if (Optional<FileEntryRef> File = SourceMgr.getFileEntryRefForID(FID)) {
+      if (Optional<ArrayRef<dependency_directives_scan::Directive>>
+              DepDirectives =
+                  getPreprocessorOpts().DependencyDirectivesForFile(*File)) {
+        TheLexer->DepDirectives = *DepDirectives;
+      }
+    }
+  }
+
+  EnterSourceFileWithLexer(TheLexer, CurDir);
   return false;
 }
 
@@ -110,7 +121,9 @@
   CurDirLookup = CurDir;
   CurLexerSubmodule = nullptr;
   if (CurLexerKind != CLK_LexAfterModuleImport)
-    CurLexerKind = CLK_Lexer;
+    CurLexerKind = TheLexer->isDependencyDirectivesLexer()
+                       ? CLK_DependencyDirectivesLexer
+                       : CLK_Lexer;
 
   // Notify the client, if desired, that we are in a new source file.
   if (Callbacks && !CurLexer->Is_PragmaLexer) {
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -426,29 +426,40 @@
   Token Tok;
   SourceLocation endLoc;
   while (true) {
-    CurLexer->Lex(Tok);
+    if (CurLexer->isDependencyDirectivesLexer()) {
+      CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
+    } else {
+      while (true) {
+        CurLexer->Lex(Tok);
 
-    if (Tok.is(tok::code_completion)) {
-      setCodeCompletionReached();
-      if (CodeComplete)
-        CodeComplete->CodeCompleteInConditionalExclusion();
-      continue;
-    }
+        if (Tok.is(tok::code_completion)) {
+          setCodeCompletionReached();
+          if (CodeComplete)
+            CodeComplete->CodeCompleteInConditionalExclusion();
+          continue;
+        }
 
-    // If this is the end of the buffer, we have an error.
-    if (Tok.is(tok::eof)) {
-      // We don't emit errors for unterminated conditionals here,
-      // Lexer::LexEndOfFile can do that properly.
-      // Just return and let the caller lex after this #include.
-      if (PreambleConditionalStack.isRecording())
-        PreambleConditionalStack.SkipInfo.emplace(
-            HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc);
-      break;
-    }
+        // If this is the end of the buffer, we have an error.
+        if (Tok.is(tok::eof)) {
+          // We don't emit errors for unterminated conditionals here,
+          // Lexer::LexEndOfFile can do that properly.
+          // Just return and let the caller lex after this #include.
+          if (PreambleConditionalStack.isRecording())
+            PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
+                                                      FoundNonSkipPortion,
+                                                      FoundElse, ElseLoc);
+          break;
+        }
 
-    // If this token is not a preprocessor directive, just skip it.
-    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
-      continue;
+        // If this token is not a preprocessor directive, just skip it.
+        if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+          continue;
+
+        break;
+      }
+    }
+    if (Tok.is(tok::eof))
+      break;
 
     // We just parsed a # character at the start of a line, so we're in
     // directive mode.  Tell the lexer this so any newlines we see will be
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -2937,6 +2937,13 @@
 unsigned Lexer::isNextPPTokenLParen() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
 
+  if (isDependencyDirectivesLexer()) {
+    if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
+      return 2;
+    return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
+        tok::l_paren);
+  }
+
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
   // without emitting diagnostics, disables macro expansion, and will cause EOF
   // to return an EOF token instead of popping the include stack.
@@ -3279,6 +3286,8 @@
 }
 
 bool Lexer::Lex(Token &Result) {
+  assert(!isDependencyDirectivesLexer());
+
   // Start a new token.
   Result.startToken();
 
@@ -4100,3 +4109,129 @@
   // We parsed the directive; lex a token with the new state.
   return false;
 }
+
+const char *Lexer::convertDependencyDirectiveToken(
+    const dependency_directives_scan::Token &DDTok, Token &Result) {
+  const char *TokPtr = BufferStart + DDTok.Offset;
+  Result.startToken();
+  Result.setLocation(getSourceLocation(TokPtr));
+  Result.setKind(DDTok.Kind);
+  Result.setFlag((Token::TokenFlags)DDTok.Flags);
+  Result.setLength(DDTok.Length);
+  BufferPtr = TokPtr + DDTok.Length;
+  return TokPtr;
+}
+
+bool Lexer::LexDependencyDirectiveToken(Token &Result) {
+  assert(isDependencyDirectivesLexer());
+
+  using namespace dependency_directives_scan;
+
+  while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
+    if (DepDirectives.front().Kind == pp_eof)
+      return LexEndOfFile(Result, BufferEnd);
+    NextDepDirectiveTokenIndex = 0;
+    DepDirectives = DepDirectives.drop_front();
+  }
+
+  const dependency_directives_scan::Token &DDTok =
+      DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
+
+  const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result);
+
+  if (Result.is(tok::hash) && Result.isAtStartOfLine()) {
+    PP->HandleDirective(Result);
+    return false;
+  }
+  if (Result.is(tok::raw_identifier)) {
+    Result.setRawIdentifierData(TokPtr);
+    if (!isLexingRawMode()) {
+      IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+      if (II->isHandleIdentifierCase())
+        return PP->HandleIdentifier(Result);
+    }
+    return true;
+  }
+  if (Result.isLiteral()) {
+    Result.setLiteralData(TokPtr);
+    return true;
+  }
+  if (Result.is(tok::colon) &&
+      (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) {
+    // Convert consecutive colons to 'tok::coloncolon'.
+    if (*BufferPtr == ':') {
+      assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
+          tok::colon));
+      ++NextDepDirectiveTokenIndex;
+      Result.setKind(tok::coloncolon);
+    }
+    return true;
+  }
+  if (Result.is(tok::eod))
+    ParsingPreprocessorDirective = false;
+
+  return true;
+}
+
+bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
+  assert(isDependencyDirectivesLexer());
+
+  using namespace dependency_directives_scan;
+
+  bool Stop = false;
+  unsigned NestedIfs = 0;
+  do {
+    DepDirectives = DepDirectives.drop_front();
+    switch (DepDirectives.front().Kind) {
+    case pp_none:
+      llvm_unreachable("unexpected 'pp_none'");
+    case pp_include:
+    case pp___include_macros:
+    case pp_define:
+    case pp_undef:
+    case pp_import:
+    case pp_pragma_import:
+    case pp_pragma_once:
+    case pp_pragma_push_macro:
+    case pp_pragma_pop_macro:
+    case pp_pragma_include_alias:
+    case pp_include_next:
+    case decl_at_import:
+    case cxx_module_decl:
+    case cxx_import_decl:
+    case cxx_export_module_decl:
+    case cxx_export_import_decl:
+      break;
+    case pp_if:
+    case pp_ifdef:
+    case pp_ifndef:
+      ++NestedIfs;
+      break;
+    case pp_elif:
+    case pp_elifdef:
+    case pp_elifndef:
+    case pp_else:
+      if (!NestedIfs) {
+        Stop = true;
+      }
+      break;
+    case pp_endif:
+      if (!NestedIfs) {
+        Stop = true;
+      } else {
+        --NestedIfs;
+      }
+      break;
+    case pp_eof:
+      return LexEndOfFile(Result, BufferEnd);
+    }
+  } while (!Stop);
+
+  const dependency_directives_scan::Token &DDTok =
+      DepDirectives.front().Tokens.front();
+  assert(DDTok.is(tok::hash));
+  NextDepDirectiveTokenIndex = 1;
+
+  convertDependencyDirectiveToken(DDTok, Result);
+  return false;
+}
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -294,10 +294,6 @@
   /// Enable directives scanning of all files.
   void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); }
 
-private:
-  /// Check whether the file should be scanned for preprocessor directives.
-  bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID);
-
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
@@ -306,6 +302,10 @@
   getOrCreateFileSystemEntry(StringRef Filename,
                              bool DisableDirectivesScanning = false);
 
+private:
+  /// Check whether the file should be scanned for preprocessor directives.
+  bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID);
+
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
   /// shared cache indexed by unique ID, or creates new entry from scratch.
Index: clang/include/clang/Lex/PreprocessorOptions.h
===================================================================
--- clang/include/clang/Lex/PreprocessorOptions.h
+++ clang/include/clang/Lex/PreprocessorOptions.h
@@ -11,6 +11,7 @@
 
 #include "clang/Basic/BitmaskEnum.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include <functional>
@@ -199,6 +200,19 @@
   /// build it again.
   std::shared_ptr<FailedModulesSet> FailedModules;
 
+  /// Function for getting the dependency preprocessor directives of a file.
+  ///
+  /// These are directives derived from a special form of lexing where the
+  /// source input is scanned for the preprocessor directives that might have an
+  /// effect on the dependencies for a compilation unit.
+  ///
+  /// Enables a client to cache the directives for a file and provide them
+  /// across multiple compiler invocations.
+  /// FIXME: Allow returning an error.
+  std::function<Optional<ArrayRef<dependency_directives_scan::Directive>>(
+      FileEntryRef)>
+      DependencyDirectivesForFile;
+
   /// Set up preprocessor for RunAnalysis action.
   bool SetUpStaticAnalyzer = false;
 
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -557,6 +557,7 @@
     CLK_Lexer,
     CLK_TokenLexer,
     CLK_CachingLexer,
+    CLK_DependencyDirectivesLexer,
     CLK_LexAfterModuleImport
   } CurLexerKind = CLK_Lexer;
 
Index: clang/include/clang/Lex/Lexer.h
===================================================================
--- clang/include/clang/Lex/Lexer.h
+++ clang/include/clang/Lex/Lexer.h
@@ -16,6 +16,7 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
 #include "clang/Lex/PreprocessorLexer.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/Optional.h"
@@ -149,6 +150,13 @@
   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
   ConflictMarkerKind CurrentConflictMarkerState;
 
+  /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer().
+  ArrayRef<dependency_directives_scan::Directive> DepDirectives;
+
+  /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the
+  /// next token to use from the current dependency directive.
+  unsigned NextDepDirectiveTokenIndex = 0;
+
   void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
 
 public:
@@ -195,6 +203,23 @@
   /// return the tok::eof token.  This implicitly involves the preprocessor.
   bool Lex(Token &Result);
 
+  /// Called when the preprocessor is in 'dependency scanning lexing mode'.
+  bool LexDependencyDirectiveToken(Token &Result);
+
+  /// Called when the preprocessor is in 'dependency scanning lexing mode' and
+  /// is skipping a conditional block.
+  bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
+
+  /// True when the preprocessor is in 'dependency scanning lexing mode' and
+  /// created this \p Lexer for lexing a set of dependency directive tokens.
+  bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
+
+  /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to
+  /// the position just after the token.
+  /// \returns the buffer pointer at the beginning of the token.
+  const char *convertDependencyDirectiveToken(
+      const dependency_directives_scan::Token &DDTok, Token &Result);
+
 public:
   /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
   bool isPragmaLexer() const { return Is_PragmaLexer; }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to