sammccall updated this revision to Diff 396639.
sammccall marked 4 inline comments as done.
sammccall added a comment.

address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114077/new/

https://reviews.llvm.org/D114077

Files:
  clang-tools-extra/clangd/Headers.cpp
  clang-tools-extra/clangd/Headers.h
  clang-tools-extra/clangd/IncludeCleaner.cpp
  clang-tools-extra/clangd/IncludeCleaner.h
  clang-tools-extra/clangd/tool/ClangdMain.cpp
  clang-tools-extra/clangd/unittests/HeadersTests.cpp
  clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

Index: clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -8,7 +8,9 @@
 
 #include "Annotations.h"
 #include "IncludeCleaner.h"
+#include "SourceCode.h"
 #include "TestTU.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -18,7 +20,9 @@
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
 using ::testing::IsEmpty;
+using ::testing::Pointee;
 using ::testing::UnorderedElementsAre;
 
 std::string guard(llvm::StringRef Code) {
@@ -211,7 +215,7 @@
     auto AST = TU.build();
 
     std::vector<Position> Points;
-    for (const auto &Loc : findReferencedLocations(AST)) {
+    for (const auto &Loc : findReferencedLocations(AST).User) {
       if (AST.getSourceManager().getBufferName(Loc).endswith(
               TU.HeaderFilename)) {
         Points.push_back(offsetToPosition(
@@ -225,6 +229,82 @@
   }
 }
 
+TEST(IncludeCleaner, Stdlib) {
+  // Smoke tests only for finding used symbols/headers.
+  // Details of Decl -> stdlib::Symbol -> stdlib::Headers mapping tested there.
+  auto TU = TestTU::withHeaderCode(R"cpp(
+    namespace std { class error_code {}; }
+    class error_code {};
+    namespace nonstd { class error_code {}; }
+  )cpp");
+  struct {
+    llvm::StringRef Code;
+    std::vector<llvm::StringRef> Symbols;
+    std::vector<llvm::StringRef> Headers;
+  } Tests[] = {
+      {"std::error_code x;", {"std::error_code"}, {"<system_error>"}},
+      {"error_code x;", {}, {}},
+      {"nonstd::error_code x;", {}, {}},
+  };
+
+  for (const auto &Test : Tests) {
+    TU.Code = Test.Code.str();
+    ParsedAST AST = TU.build();
+    std::vector<stdlib::Symbol> WantSyms;
+    for (const auto &SymName : Test.Symbols) {
+      auto QName = splitQualifiedName(SymName);
+      auto Sym = stdlib::Symbol::named(QName.first, QName.second);
+      EXPECT_TRUE(Sym) << SymName;
+      WantSyms.push_back(*Sym);
+    }
+    std::vector<stdlib::Header> WantHeaders;
+    for (const auto &HeaderName : Test.Headers) {
+      auto Header = stdlib::Header::named(HeaderName);
+      EXPECT_TRUE(Header) << HeaderName;
+      WantHeaders.push_back(*Header);
+    }
+
+    ReferencedLocations Locs = findReferencedLocations(AST);
+    EXPECT_THAT(Locs.Stdlib, ElementsAreArray(WantSyms));
+    ReferencedFiles Files = findReferencedFiles(Locs, AST.getIncludeStructure(),
+                                                AST.getSourceManager());
+    EXPECT_THAT(Files.Stdlib, ElementsAreArray(WantHeaders));
+  }
+}
+
+MATCHER_P(WrittenInclusion, Written, "") {
+  if (arg.Written != Written)
+    *result_listener << arg.Written;
+  return arg.Written == Written;
+}
+
+TEST(IncludeCleaner, StdlibUnused) {
+  setIncludeCleanerAnalyzesStdlib(true);
+  auto Cleanup =
+      llvm::make_scope_exit([] { setIncludeCleanerAnalyzesStdlib(false); });
+
+  auto TU = TestTU::withCode(R"cpp(
+    #include <list>
+    #include <queue>
+    std::list<int> x;
+  )cpp");
+  // Layout of std library impl is not relevant.
+  TU.AdditionalFiles["bits"] = R"cpp(
+    #pragma once
+    namespace std {
+      template <typename> class list {};
+      template <typename> class queue {};
+    }
+  )cpp";
+  TU.AdditionalFiles["list"] = "#include <bits>";
+  TU.AdditionalFiles["queue"] = "#include <bits>";
+  TU.ExtraArgs = {"-isystem", testRoot()};
+  auto AST = TU.build();
+
+  auto Unused = computeUnusedIncludes(AST);
+  EXPECT_THAT(Unused, ElementsAre(Pointee(WrittenInclusion("<queue>"))));
+}
+
 TEST(IncludeCleaner, GetUnusedHeaders) {
   llvm::StringLiteral MainFile = R"cpp(
     #include "a.h"
@@ -301,7 +381,7 @@
   auto ReferencedFiles =
       findReferencedFiles(findReferencedLocations(AST), Includes, SM);
   llvm::StringSet<> ReferencedFileNames;
-  for (FileID FID : ReferencedFiles)
+  for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
         SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
   // Note we deduped the names as _number_ of <built-in>s is uninteresting.
@@ -352,7 +432,7 @@
                           AST.getIncludeStructure(), AST.getSourceManager());
   llvm::StringSet<> ReferencedFileNames;
   auto &SM = AST.getSourceManager();
-  for (FileID FID : ReferencedFiles)
+  for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
         SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
   // Note that we have uplifted the referenced files from non self-contained
@@ -386,7 +466,7 @@
                           AST.getIncludeStructure(), AST.getSourceManager());
   llvm::StringSet<> ReferencedFileNames;
   auto &SM = AST.getSourceManager();
-  for (FileID FID : ReferencedFiles)
+  for (FileID FID : ReferencedFiles.User)
     ReferencedFileNames.insert(
         SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
   // Note that we have uplifted the referenced files from non self-contained
@@ -406,7 +486,7 @@
   auto ReferencedFiles =
       findReferencedFiles(findReferencedLocations(AST),
                           AST.getIncludeStructure(), AST.getSourceManager());
-  EXPECT_TRUE(ReferencedFiles.empty());
+  EXPECT_TRUE(ReferencedFiles.User.empty());
   EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty()));
 }
 
Index: clang-tools-extra/clangd/unittests/HeadersTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -408,6 +409,58 @@
   EXPECT_FALSE(Includes.isSelfContained(getID("pp_depend.h", Includes)));
 }
 
+TEST(StdlibTest, All) {
+  auto VectorH = stdlib::Header::named("<vector>");
+  EXPECT_TRUE(VectorH);
+  EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
+  EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
+
+  auto Vector = stdlib::Symbol::named("std::", "vector");
+  EXPECT_TRUE(Vector);
+  EXPECT_EQ(llvm::to_string(*Vector), "std::vector");
+  EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
+  EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
+
+  EXPECT_EQ(Vector->header(), *VectorH);
+  EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
+}
+
+TEST(StdlibTest, Recognizer) {
+  auto TU = TestTU::withCode(R"cpp(
+    namespace std {
+    inline namespace inl {
+
+    template <typename>
+    struct vector { class nested {}; };
+
+    class secret {};
+
+    } // inl
+    } // std
+
+    class vector {};
+    std::vector<int> vec;
+    std::vector<int>::nested nest;
+    std::secret sec;
+  )cpp");
+
+  auto AST = TU.build();
+  auto &vector_nonstd = findDecl(AST, "vector");
+  auto *vec =
+      cast<VarDecl>(findDecl(AST, "vec")).getType()->getAsCXXRecordDecl();
+  auto *nest =
+      cast<VarDecl>(findDecl(AST, "nest")).getType()->getAsCXXRecordDecl();
+  auto *sec =
+      cast<VarDecl>(findDecl(AST, "sec")).getType()->getAsCXXRecordDecl();
+
+  stdlib::Recognizer recognizer;
+
+  EXPECT_EQ(recognizer(&vector_nonstd), llvm::None);
+  EXPECT_EQ(recognizer(vec), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(recognizer(nest), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(recognizer(sec), llvm::None);
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/tool/ClangdMain.cpp
===================================================================
--- clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -12,6 +12,7 @@
 #include "Config.h"
 #include "ConfigProvider.h"
 #include "Feature.h"
+#include "IncludeCleaner.h"
 #include "PathMapping.h"
 #include "Protocol.h"
 #include "TidyProvider.h"
@@ -251,6 +252,15 @@
             "Never insert #include directives as part of code completion")),
 };
 
+opt<bool> IncludeCleanerStdlib{
+    "include-cleaner-stdlib",
+    cat(Features),
+    desc("Apply include-cleaner analysis to standard library headers "
+         "(immature!)"),
+    init(false),
+    Hidden,
+};
+
 opt<bool> HeaderInsertionDecorators{
     "header-insertion-decorators",
     cat(Features),
@@ -932,6 +942,7 @@
   };
   if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding)
     Opts.Encoding = ForceOffsetEncoding;
+  setIncludeCleanerAnalyzesStdlib(IncludeCleanerStdlib);
 
   if (CheckFile.getNumOccurrences()) {
     llvm::SmallString<256> Path;
Index: clang-tools-extra/clangd/IncludeCleaner.h
===================================================================
--- clang-tools-extra/clangd/IncludeCleaner.h
+++ clang-tools-extra/clangd/IncludeCleaner.h
@@ -30,7 +30,11 @@
 namespace clang {
 namespace clangd {
 
-using ReferencedLocations = llvm::DenseSet<SourceLocation>;
+struct ReferencedLocations {
+  llvm::DenseSet<SourceLocation> User;
+  llvm::DenseSet<stdlib::Symbol> Stdlib;
+};
+
 /// Finds locations of all symbols used in the main file.
 ///
 /// - RecursiveASTVisitor finds references to symbols and records their
@@ -48,17 +52,22 @@
 /// - err on the side of reporting all possible locations
 ReferencedLocations findReferencedLocations(ParsedAST &AST);
 
+struct ReferencedFiles {
+  llvm::DenseSet<FileID> User;
+  llvm::DenseSet<stdlib::Header> Stdlib;
+};
+
 /// Retrieves IDs of all files containing SourceLocations from \p Locs.
 /// The output only includes things SourceManager sees as files (not macro IDs).
 /// This can include <built-in>, <scratch space> etc that are not true files.
-llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
-                                           const IncludeStructure &Includes,
-                                           const SourceManager &SM);
+ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
+                                    const IncludeStructure &Includes,
+                                    const SourceManager &SM);
 
 /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).
 /// FileIDs that are not true files (<built-in> etc) are dropped.
 llvm::DenseSet<IncludeStructure::HeaderID>
-translateToHeaderIDs(const llvm::DenseSet<FileID> &Files,
+translateToHeaderIDs(const ReferencedFiles &Files,
                      const IncludeStructure &Includes, const SourceManager &SM);
 
 /// Retrieves headers that are referenced from the main file but not used.
@@ -72,6 +81,15 @@
 std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST,
                                                  llvm::StringRef Code);
 
+/// Affects whether standard library includes should be considered for removal.
+/// This is off by default for now due to implementation limitations:
+/// - macros are not tracked
+/// - symbol names without a unique associated header are not tracked
+/// - references to std-namespaced C types are not properly tracked:
+///   instead of std::size_t -> <cstddef> we see ::size_t -> <stddef.h>
+/// FIXME: remove this hack once the implementation is good enough.
+void setIncludeCleanerAnalyzesStdlib(bool B);
+
 } // namespace clangd
 } // namespace clang
 
Index: clang-tools-extra/clangd/IncludeCleaner.cpp
===================================================================
--- clang-tools-extra/clangd/IncludeCleaner.cpp
+++ clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -26,6 +26,10 @@
 
 namespace clang {
 namespace clangd {
+
+static bool AnalyzeStdlib = false;
+void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; }
+
 namespace {
 
 /// Crawler traverses the AST and feeds in the locations of (sometimes
@@ -127,6 +131,10 @@
   void add(const Decl *D) {
     if (!D || !isNew(D->getCanonicalDecl()))
       return;
+    if (auto SS = StdRecognizer(D)) {
+      Result.Stdlib.insert(*SS);
+      return;
+    }
     // Special case RecordDecls, as it is common for them to be forward
     // declared multiple times. The most common cases are:
     // - Definition available in TU, only mark that one as usage. The rest is
@@ -136,14 +144,14 @@
     //   redecls.
     if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) {
       if (const auto *Definition = RD->getDefinition()) {
-        Result.insert(Definition->getLocation());
+        Result.User.insert(Definition->getLocation());
         return;
       }
       if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation()))
         return;
     }
     for (const Decl *Redecl : D->redecls())
-      Result.insert(Redecl->getLocation());
+      Result.User.insert(Redecl->getLocation());
   }
 
   bool isNew(const void *P) { return P && Visited.insert(P).second; }
@@ -151,13 +159,14 @@
   ReferencedLocations &Result;
   llvm::DenseSet<const void *> Visited;
   const SourceManager &SM;
+  stdlib::Recognizer StdRecognizer;
 };
 
 // Given a set of referenced FileIDs, determines all the potentially-referenced
 // files and macros by traversing expansion/spelling locations of macro IDs.
 // This is used to map the referenced SourceLocations onto real files.
-struct ReferencedFiles {
-  ReferencedFiles(const SourceManager &SM) : SM(SM) {}
+struct ReferencedFilesBuilder {
+  ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {}
   llvm::DenseSet<FileID> Files;
   llvm::DenseSet<FileID> Macros;
   const SourceManager &SM;
@@ -218,18 +227,23 @@
       continue;
     auto Loc = Macro->Info->getDefinitionLoc();
     if (Loc.isValid())
-      Result.insert(Loc);
+      Result.User.insert(Loc);
+    // FIXME: support stdlib macros
   }
 }
 
-bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
+static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
+  if (Inc.BehindPragmaKeep)
+    return false;
+
   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
-  // Standard Library headers are typically umbrella headers, and system
-  // headers are likely to be the Standard Library headers. Until we have a
-  // good support for umbrella headers and Standard Library headers, don't warn
-  // about them.
-  if (Inc.Written.front() == '<' || Inc.BehindPragmaKeep)
+  // System headers are likely to be standard library headers.
+  // Until we have good support for umbrella headers, don't warn about them.
+  if (Inc.Written.front() == '<') {
+    if (AnalyzeStdlib && stdlib::Header::named(Inc.Written))
+      return true;
     return false;
+  }
   // Headers without include guards have side effects and are not
   // self-contained, skip them.
   assert(Inc.HeaderID);
@@ -282,29 +296,36 @@
   return Result;
 }
 
-llvm::DenseSet<FileID>
-findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
-                    const IncludeStructure &Includes, const SourceManager &SM) {
-  std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
+ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs,
+                                    const IncludeStructure &Includes,
+                                    const SourceManager &SM) {
+  std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()};
   llvm::sort(Sorted); // Group by FileID.
-  ReferencedFiles Files(SM);
+  ReferencedFilesBuilder Builder(SM);
   for (auto It = Sorted.begin(); It < Sorted.end();) {
     FileID FID = SM.getFileID(*It);
-    Files.add(FID, *It);
+    Builder.add(FID, *It);
     // Cheaply skip over all the other locations from the same FileID.
     // This avoids lots of redundant Loc->File lookups for the same file.
     do
       ++It;
     while (It != Sorted.end() && SM.isInFileID(*It, FID));
   }
+
   // If a header is not self-contained, we consider its symbols a logical part
   // of the including file. Therefore, mark the parents of all used
   // non-self-contained FileIDs as used. Perform this on FileIDs rather than
   // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
-  llvm::DenseSet<FileID> Result;
-  for (FileID ID : Files.Files)
-    Result.insert(headerResponsible(ID, SM, Includes));
-  return Result;
+  llvm::DenseSet<FileID> UserFiles;
+  for (FileID ID : Builder.Files)
+    UserFiles.insert(headerResponsible(ID, SM, Includes));
+
+  llvm::DenseSet<stdlib::Header> StdlibFiles;
+  for (const auto &Symbol : Locs.Stdlib)
+    for (const auto &Header : Symbol.headers())
+      StdlibFiles.insert(Header);
+
+  return {std::move(UserFiles), std::move(StdlibFiles)};
 }
 
 std::vector<const Inclusion *>
@@ -338,13 +359,13 @@
 #endif
 
 llvm::DenseSet<IncludeStructure::HeaderID>
-translateToHeaderIDs(const llvm::DenseSet<FileID> &Files,
+translateToHeaderIDs(const ReferencedFiles &Files,
                      const IncludeStructure &Includes,
                      const SourceManager &SM) {
   trace::Span Tracer("IncludeCleaner::translateToHeaderIDs");
   llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs;
-  TranslatedHeaderIDs.reserve(Files.size());
-  for (FileID FID : Files) {
+  TranslatedHeaderIDs.reserve(Files.User.size());
+  for (FileID FID : Files.User) {
     const FileEntry *FE = SM.getFileEntryForID(FID);
     if (!FE) {
       assert(isSpecialBuffer(FID, SM));
@@ -354,6 +375,9 @@
     assert(File);
     TranslatedHeaderIDs.insert(*File);
   }
+  for (stdlib::Header StdlibUsed : Files.Stdlib)
+    for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed))
+      TranslatedHeaderIDs.insert(HID);
   return TranslatedHeaderIDs;
 }
 
Index: clang-tools-extra/clangd/Headers.h
===================================================================
--- clang-tools-extra/clangd/Headers.h
+++ clang-tools-extra/clangd/Headers.h
@@ -32,8 +32,78 @@
 #include <string>
 
 namespace clang {
+class Decl;
+class NamespaceDecl;
 namespace clangd {
 
+// clangd has a built-in database of standard library symbols.
+namespace stdlib {
+
+// A standard library header, such as <iostream>
+// Lightweight class, in fact just an index into a table.
+class Header {
+public:
+  static llvm::Optional<Header> named(llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
+    return OS << H.name();
+  }
+  llvm::StringRef name() const;
+
+private:
+  Header(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend class Symbol;
+  friend llvm::DenseMapInfo<Header>;
+  friend bool operator==(const Header &L, const Header &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A top-level standard library symbol, such as std::vector
+// Lightweight class, in fact just an index into a table.
+class Symbol {
+public:
+  static llvm::Optional<Symbol> named(llvm::StringRef Scope,
+                                      llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
+    return OS << S.scope() << S.name();
+  }
+  llvm::StringRef scope() const;
+  llvm::StringRef name() const;
+  // The preferred header for this symbol (e.g. the suggested insertion).
+  Header header() const;
+  // Some symbols may be provided my multiple headers.
+  llvm::SmallVector<Header> headers() const;
+
+private:
+  Symbol(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend class Recognizer;
+  friend llvm::DenseMapInfo<Symbol>;
+  friend bool operator==(const Symbol &L, const Symbol &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A functor to find the stdlib::Symbol associated with a decl.
+//
+// For non-top-level decls (std::vector<int>::iterator), returns the top-level
+// symbol (std::vector).
+class Recognizer {
+public:
+  Recognizer();
+  llvm::Optional<Symbol> operator()(const Decl *D);
+
+private:
+  using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+  NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
+  llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
+};
+
+} // namespace stdlib
+
 /// Returns true if \p Include is literal include like "path" or <path>.
 bool isLiteralInclude(llvm::StringRef Include);
 
@@ -160,6 +230,8 @@
   // Maps HeaderID to the ids of the files included from it.
   llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren;
 
+  llvm::DenseMap<stdlib::Header, llvm::SmallVector<HeaderID>> StdlibHeaders;
+
   std::vector<Inclusion> MainFileIncludes;
 
   // We reserve HeaderID(0) for the main file and will manually check for that
@@ -270,6 +342,38 @@
   }
 };
 
+template <> struct DenseMapInfo<clang::clangd::stdlib::Header> {
+  static inline clang::clangd::stdlib::Header getEmptyKey() {
+    return clang::clangd::stdlib::Header(-1);
+  }
+  static inline clang::clangd::stdlib::Header getTombstoneKey() {
+    return clang::clangd::stdlib::Header(-2);
+  }
+  static unsigned getHashValue(const clang::clangd::stdlib::Header &H) {
+    return hash_value(H.ID);
+  }
+  static bool isEqual(const clang::clangd::stdlib::Header &LHS,
+                      const clang::clangd::stdlib::Header &RHS) {
+    return LHS == RHS;
+  }
+};
+
+template <> struct DenseMapInfo<clang::clangd::stdlib::Symbol> {
+  static inline clang::clangd::stdlib::Symbol getEmptyKey() {
+    return clang::clangd::stdlib::Symbol(-1);
+  }
+  static inline clang::clangd::stdlib::Symbol getTombstoneKey() {
+    return clang::clangd::stdlib::Symbol(-2);
+  }
+  static unsigned getHashValue(const clang::clangd::stdlib::Symbol &S) {
+    return hash_value(S.ID);
+  }
+  static bool isEqual(const clang::clangd::stdlib::Symbol &LHS,
+                      const clang::clangd::stdlib::Symbol &RHS) {
+    return LHS == RHS;
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
Index: clang-tools-extra/clangd/Headers.cpp
===================================================================
--- clang-tools-extra/clangd/Headers.cpp
+++ clang-tools-extra/clangd/Headers.cpp
@@ -61,10 +61,19 @@
           SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1;
       Inc.FileKind = FileKind;
       Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID();
-      if (File)
-        Inc.HeaderID = static_cast<unsigned>(Out->getOrCreateID(File));
       if (LastPragmaKeepInMainFileLine == Inc.HashLine)
         Inc.BehindPragmaKeep = true;
+      if (File) {
+        IncludeStructure::HeaderID HID = Out->getOrCreateID(File);
+        Inc.HeaderID = static_cast<unsigned>(HID);
+        if (IsAngled)
+          if (auto StdlibHeader = stdlib::Header::named(Inc.Written)) {
+            auto &IDs = Out->StdlibHeaders[*StdlibHeader];
+            // Few physical files for one stdlib header name, linear scan is ok.
+            if (!llvm::is_contained(IDs, HID))
+              IDs.push_back(HID);
+          }
+      }
     }
 
     // Record include graph (not just for main-file includes)
@@ -340,5 +349,155 @@
          std::tie(RHS.Directive, RHS.FileKind, RHS.HashOffset, RHS.HashLine,
                   RHS.Resolved, RHS.Written);
 }
+
+namespace stdlib {
+static llvm::StringRef *HeaderNames;
+static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
+static unsigned *SymbolHeaderIDs;
+static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
+// Maps symbol name -> Symbol::ID, within a namespace.
+using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
+
+static int initialize() {
+  unsigned SymCount = 0;
+#define SYMBOL(Name, NS, Header) ++SymCount;
+#include "CSymbolMap.inc"
+#include "StdSymbolMap.inc"
+#undef SYMBOL
+  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
+  SymbolHeaderIDs =
+      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
+  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
+  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
+
+  auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
+    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
+    if (R.second)
+      R.first->second = new NSSymbolMap();
+    return *R.first->second;
+  };
+
+  auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
+    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
+  };
+
+  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
+                              llvm::StringRef HeaderName) mutable {
+    if (NS == "None")
+      NS = "";
+
+    SymbolNames[SymIndex] = {NS, Name};
+    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+
+    NSSymbolMap &NSSymbols = AddNS(NS);
+    NSSymbols.try_emplace(Name, SymIndex);
+
+    ++SymIndex;
+  };
+#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
+#include "CSymbolMap.inc"
+#include "StdSymbolMap.inc"
+#undef SYMBOL
+
+  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
+  for (const auto &E : *HeaderIDs)
+    HeaderNames[E.second] = E.first;
+
+  return 0;
+}
+
+static void ensureInitialized() {
+  static int Dummy = initialize();
+  (void)Dummy;
+}
+
+llvm::Optional<Header> Header::named(llvm::StringRef Name) {
+  ensureInitialized();
+  auto It = HeaderIDs->find(Name);
+  if (It == HeaderIDs->end())
+    return llvm::None;
+  return Header(It->second);
+}
+llvm::StringRef Header::name() const { return HeaderNames[ID]; }
+llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
+llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
+llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
+                                     llvm::StringRef Name) {
+  ensureInitialized();
+  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
+    auto It = NSSymbols->find(Name);
+    if (It != NSSymbols->end())
+      return Symbol(It->second);
+  }
+  return llvm::None;
+}
+Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+llvm::SmallVector<Header> Symbol::headers() const {
+  return {header()}; // FIXME: multiple in case of ambiguity
+}
+
+Recognizer::Recognizer() { ensureInitialized(); }
+
+NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
+  auto It = NamespaceCache.find(D);
+  if (It != NamespaceCache.end())
+    return It->second;
+
+  NSSymbolMap *Result = [&]() -> NSSymbolMap * {
+    if (!D) // Nullptr means the global namespace
+      return NamespaceSymbols->lookup("");
+    if (D->isAnonymousNamespace())
+      return nullptr;
+    if (D->isInlineNamespace()) {
+      if (auto *Parent = llvm::dyn_cast_or_null<NamespaceDecl>(D->getParent()))
+        return namespaceSymbols(Parent);
+      return nullptr;
+    }
+    return NamespaceSymbols->lookup(printNamespaceScope(*D));
+  }();
+  NamespaceCache.try_emplace(D, Result);
+  return Result;
+}
+
+llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
+  // If D is std::vector::iterator, `vector` is the outer symbol to look up.
+  // We keep all the candidate DCs as some may turn out to be anon enums.
+  // Do this resolution lazily as we may turn out not to have a std namespace.
+  llvm::SmallVector<const DeclContext *> IntermediateDecl;
+  const DeclContext *DC = D->getDeclContext();
+  while (DC && !DC->isNamespace()) {
+    if (NamedDecl::classofKind(DC->getDeclKind()))
+      IntermediateDecl.push_back(DC);
+    DC = DC->getParent();
+  }
+  NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
+  if (!Symbols)
+    return llvm::None;
+
+  llvm::StringRef Name = [&]() -> llvm::StringRef {
+    for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
+      DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
+      if (const auto *II = N.getAsIdentifierInfo())
+        return II->getName();
+      if (!N.isEmpty())
+        return ""; // e.g. operator<: give up
+    }
+    if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
+      if (const auto *II = ND->getIdentifier())
+        return II->getName();
+    return "";
+  }();
+  if (Name.empty())
+    return llvm::None;
+
+  auto It = Symbols->find(Name);
+  if (It == Symbols->end())
+    return llvm::None;
+  return Symbol(It->second);
+}
+
+} // namespace stdlib
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to