kbobyrev updated this revision to Diff 389183.
kbobyrev marked 2 inline comments as done.
kbobyrev added a comment.

Resolve most comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114370/new/

https://reviews.llvm.org/D114370

Files:
  clang-tools-extra/clangd/IncludeCleaner.cpp
  clang-tools-extra/clangd/IncludeCleaner.h
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/SourceCode.h
  clang-tools-extra/clangd/index/SymbolCollector.cpp
  clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

Index: clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -281,7 +281,8 @@
   auto &SM = AST.getSourceManager();
   auto &Includes = AST.getIncludeStructure();
 
-  auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM);
+  auto ReferencedFiles =
+      findReferencedFiles(findReferencedLocations(AST), AST.getPreprocessor());
   llvm::StringSet<> ReferencedFileNames;
   for (FileID FID : ReferencedFiles)
     ReferencedFileNames.insert(
@@ -303,6 +304,52 @@
   EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty());
 }
 
+TEST(IncludeCleaner, NonSelfContainedHeaders) {
+  TestTU TU;
+  TU.Code = R"cpp(
+    #include "bar.h"
+    #include "foo.h"
+
+    int LocalFoo = foo::Variable,
+        LocalBar = bar::Variable;
+    )cpp";
+  TU.AdditionalFiles["bar.h"] = R"cpp(
+    #pragma once
+    namespace bar {
+    #include "indirection.h"
+    }
+    )cpp";
+  TU.AdditionalFiles["foo.h"] = R"cpp(
+    #pragma once
+    namespace foo {
+    #include "unguarded.h"
+    }
+    )cpp";
+  TU.AdditionalFiles["indirection.h"] = R"cpp(
+    #include "unguarded.h"
+    )cpp";
+  TU.AdditionalFiles["unguarded.h"] = R"cpp(
+    constexpr int Variable = 42;
+    )cpp";
+
+  ParsedAST AST = TU.build();
+
+  auto ReferencedFiles =
+      findReferencedFiles(findReferencedLocations(AST), AST.getPreprocessor());
+  llvm::StringSet<> ReferencedFileNames;
+  auto &SM = AST.getSourceManager();
+  for (FileID FID : ReferencedFiles)
+    ReferencedFileNames.insert(
+        SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
+  // Note that we have uplifted the referenced files from non self-contained
+  // headers to header-guarded ones.
+  for (const auto &Key : ReferencedFileNames.keys()) {
+    llvm::outs() << "Key: " << Key << '\n';
+  }
+  EXPECT_THAT(ReferencedFileNames.keys(),
+              UnorderedElementsAre(testPath("bar.h"), testPath("foo.h")));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/SymbolCollector.cpp
===================================================================
--- clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -266,7 +266,7 @@
         return toURI(Canonical);
       }
     }
-    if (!isSelfContainedHeader(FID, FE)) {
+    if (!isSelfContainedHeader(*PP, FID, FE, /*ExpensiveCheck=*/true)) {
       // A .inc or .def file is often included into a real header to define
       // symbols (e.g. LLVM tablegen files).
       if (Filename.endswith(".inc") || Filename.endswith(".def"))
@@ -279,20 +279,6 @@
     return toURI(FE);
   }
 
-  bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
-    // FIXME: Should files that have been #import'd be considered
-    // self-contained? That's really a property of the includer,
-    // not of the file.
-    if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
-        !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
-      return false;
-    // This pattern indicates that a header can't be used without
-    // particular preprocessor state, usually set up by another header.
-    if (isDontIncludeMeHeader(SM.getBufferData(FID)))
-      return false;
-    return true;
-  }
-
   // Is Line an #if or #ifdef directive?
   static bool isIf(llvm::StringRef Line) {
     Line = Line.ltrim();
Index: clang-tools-extra/clangd/SourceCode.h
===================================================================
--- clang-tools-extra/clangd/SourceCode.h
+++ clang-tools-extra/clangd/SourceCode.h
@@ -324,6 +324,13 @@
 /// Returns true if the given location is in a generated protobuf file.
 bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
 
+/// Use heuristics to check whether the header is self-contained (has header
+/// guard, does not rely on preprocessor state).
+/// When \p ExpensiveCheck is true, this will read a portion of the file which
+/// be discouraged in performance-sensitive context.
+bool isSelfContainedHeader(const Preprocessor &PP, FileID FID,
+                           const FileEntry *FE, bool ExpensiveCheck = false);
+
 } // namespace clangd
 } // namespace clang
 #endif
Index: clang-tools-extra/clangd/SourceCode.cpp
===================================================================
--- clang-tools-extra/clangd/SourceCode.cpp
+++ clang-tools-extra/clangd/SourceCode.cpp
@@ -50,6 +50,44 @@
 namespace clang {
 namespace clangd {
 
+namespace {
+
+// Is Line an #if or #ifdef directive?
+static bool isIf(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  return Line.startswith("if");
+}
+
+// Is Line an #error directive mentioning includes?
+static bool isErrorAboutInclude(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  if (!Line.startswith("error"))
+    return false;
+  return Line.contains_insensitive(
+      "includ"); // Matches "include" or "including".
+}
+
+// Heuristically headers that only want to be included via an umbrella.
+static bool isDontIncludeMeHeader(llvm::StringRef Content) {
+  llvm::StringRef Line;
+  // Only sniff up to 100 lines or 10KB.
+  Content = Content.take_front(100 * 100);
+  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
+    std::tie(Line, Content) = Content.split('\n');
+    if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
+      return true;
+  }
+  return false;
+}
+
+} // namespace
+
 // Here be dragons. LSP positions use columns measured in *UTF-16 code units*!
 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
 
@@ -1182,5 +1220,20 @@
   return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT);
 }
 
+bool isSelfContainedHeader(const Preprocessor &PP, FileID FID,
+                           const FileEntry *FE, bool ExpensiveCheck) {
+  // FIXME: Should files that have been #import'd be considered
+  // self-contained? That's really a property of the includer,
+  // not of the file.
+  if (!PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
+      !PP.getHeaderSearchInfo().hasFileBeenImported(FE))
+    return false;
+  // This pattern indicates that a header can't be used without
+  // particular preprocessor state, usually set up by another header.
+  if (ExpensiveCheck)
+    return !isDontIncludeMeHeader(PP.getSourceManager().getBufferData(FID));
+  return true;
+}
+
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/IncludeCleaner.h
===================================================================
--- clang-tools-extra/clangd/IncludeCleaner.h
+++ clang-tools-extra/clangd/IncludeCleaner.h
@@ -52,7 +52,7 @@
 /// The output only includes things SourceManager sees as files (not macro IDs).
 /// This can include <built-in>, <scratch space> etc that are not true files.
 llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
-                                           const SourceManager &SM);
+                                           const Preprocessor &PP);
 
 /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).
 /// FileIDs that are not true files (<built-in> etc) are dropped.
Index: clang-tools-extra/clangd/IncludeCleaner.cpp
===================================================================
--- clang-tools-extra/clangd/IncludeCleaner.cpp
+++ clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Syntax/Tokens.h"
@@ -234,20 +235,37 @@
 
 llvm::DenseSet<FileID>
 findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
-                    const SourceManager &SM) {
+                    const Preprocessor &PP) {
   std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
   llvm::sort(Sorted); // Group by FileID.
-  ReferencedFiles Result(SM);
+  const SourceManager &SM = PP.getSourceManager();
+  ReferencedFiles Files(SM);
   for (auto It = Sorted.begin(); It < Sorted.end();) {
     FileID FID = SM.getFileID(*It);
-    Result.add(FID, *It);
+    Files.add(FID, *It);
     // Cheaply skip over all the other locations from the same FileID.
     // This avoids lots of redundant Loc->File lookups for the same file.
     do
       ++It;
     while (It != Sorted.end() && SM.isInFileID(*It, FID));
   }
-  return std::move(Result.Files);
+  // If a header is not self-contained, we consider its symbols a logical part
+  // of the including file. Therefore, mark the parents of all used
+  // non-self-contained FileIDs as used. Perform this on FileIDs rather than
+  // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
+  llvm::DenseSet<FileID> Result;
+  for (FileID ID : Files.Files) {
+    // Unroll the chain of non self-contained headers to get to the first one
+    // with the header guard.
+    for (const FileEntry *FE = SM.getFileEntryForID(ID);
+         ID != SM.getMainFileID() && FE &&
+         !isSelfContainedHeader(PP, ID, FE);) {
+      ID = SM.getFileID(SM.getIncludeLoc(ID));
+      FE = SM.getFileEntryForID(ID);
+    }
+    Result.insert(ID);
+  }
+  return Result;
 }
 
 std::vector<const Inclusion *>
@@ -304,12 +322,7 @@
   const auto &SM = AST.getSourceManager();
 
   auto Refs = findReferencedLocations(AST);
-  // FIXME(kirillbobyrev): Attribute the symbols from non self-contained
-  // headers to their parents while the information about respective
-  // SourceLocations and FileIDs is not lost. It is not possible to do that
-  // later when non-guarded headers included multiple times will get same
-  // HeaderID.
-  auto ReferencedFileIDs = findReferencedFiles(Refs, SM);
+  auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getPreprocessor());
   auto ReferencedHeaders =
       translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
   return getUnused(AST, ReferencedHeaders);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to