hokein created this revision.
hokein added a reviewer: sammccall.
Herald added subscribers: arphaman, jkorous, MaskRay, ioeric, ilya-biryukov.

This is the first step of implementing Xrefs in clangd:

- add index interfaces, and related data structures.
- implement a naive in-memory index for symbol occurrences.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D49658

Files:
  clangd/FindSymbols.h
  clangd/index/FileIndex.cpp
  clangd/index/FileIndex.h
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/MemIndex.cpp
  clangd/index/MemIndex.h
  clangd/index/Merge.cpp
  unittests/clangd/CodeCompleteTests.cpp
  unittests/clangd/IndexTests.cpp

Index: unittests/clangd/IndexTests.cpp
===================================================================
--- unittests/clangd/IndexTests.cpp
+++ unittests/clangd/IndexTests.cpp
@@ -34,7 +34,22 @@
   return Sym;
 }
 
+SymbolOccurrence symbolOccurrence(SymbolOccurrenceKind Kind,
+                                  llvm::StringRef FileURI,
+                                  uint32_t Line, uint32_t Column) {
+  SymbolOccurrence Occurrence;
+  Occurrence.Kind = Kind;
+  Occurrence.Location.FileURI = FileURI;
+  Occurrence.Location.Start.Line = Occurrence.Location.End.Line;
+  Occurrence.Location.Start.Column = Column;
+  Occurrence.Location.End.Column = Column;
+  return Occurrence;
+}
+
 MATCHER_P(Named, N, "") { return arg.Name == N; }
+MATCHER_P(Occurrence, L, "") {
+  return arg == L;
+}
 
 TEST(SymbolSlab, FindAndIterate) {
   SymbolSlab::Builder B;
@@ -235,6 +250,53 @@
   EXPECT_THAT(lookup(I, SymbolID("ns::nonono")), UnorderedElementsAre());
 }
 
+std::vector<SymbolOccurrence> findOccurrences(const SymbolIndex &I,
+                                              OccurrencesRequest Req) {
+  std::vector<SymbolOccurrence> Results;
+  I.findOccurrences(Req, [&](const SymbolOccurrence &Occurrence) {
+    Results.push_back(Occurrence);
+  });
+  return Results;
+}
+
+TEST(MemIndexTest, findOccurrences) {
+  auto FooDecl = symbolOccurrence(SymbolOccurrenceKind::Declaration,
+                              "file:///foo.h", 1, 2);
+  auto FooDef = symbolOccurrence(SymbolOccurrenceKind::Definition,
+                              "file:///foo.cc", 1, 2);
+  auto FooRef = symbolOccurrence(SymbolOccurrenceKind::Reference,
+                              "file:///call_foo.cc", 1, 2);
+  auto BarDecl = symbolOccurrence(SymbolOccurrenceKind::Declaration,
+                                  "file:///bar.h", 1, 2);
+  SymbolOccurrenceSlab::Builder Occurrences;
+  Occurrences.insert(SymbolID("foo"), FooDecl);
+  Occurrences.insert(SymbolID("foo"), FooDef);
+  Occurrences.insert(SymbolID("foo"), FooRef);
+  Occurrences.insert(SymbolID("bar"), BarDecl);
+  auto Empty = std::make_shared<std::vector<const Symbol*>>();
+  MemIndex I;
+  I.build(Empty, std::move(Occurrences).build());
+
+  OccurrencesRequest Req;
+  Req.IDs.insert(SymbolID("foo"));
+  Req.Filter = SymbolOccurrenceKind::Declaration;
+  EXPECT_THAT(findOccurrences(I, Req),
+              UnorderedElementsAre(Occurrence(FooDecl)));
+
+  Req.Filter = SymbolOccurrenceKind::Definition;
+  EXPECT_THAT(findOccurrences(I, Req),
+              UnorderedElementsAre(Occurrence(FooDef)));
+
+  Req.Filter = SymbolOccurrenceKind::Declaration |
+               SymbolOccurrenceKind::Definition |
+               SymbolOccurrenceKind::Reference;
+
+  EXPECT_THAT(findOccurrences(I, Req),
+              UnorderedElementsAre(Occurrence(FooDecl),
+                                   Occurrence(FooDef),
+                                   Occurrence(FooRef)));
+}
+
 TEST(MergeIndexTest, Lookup) {
   MemIndex I, J;
   I.build(generateSymbols({"ns::A", "ns::B"}));
Index: unittests/clangd/CodeCompleteTests.cpp
===================================================================
--- unittests/clangd/CodeCompleteTests.cpp
+++ unittests/clangd/CodeCompleteTests.cpp
@@ -891,6 +891,10 @@
   void lookup(const LookupRequest &,
               llvm::function_ref<void(const Symbol &)>) const override {}
 
+  void findOccurrences(const OccurrencesRequest &Req,
+                       llvm::function_ref<void(const SymbolOccurrence &)>
+                           Callback) const override {}
+
   const std::vector<FuzzyFindRequest> allRequests() const { return Requests; }
 
 private:
Index: clangd/index/Merge.cpp
===================================================================
--- clangd/index/Merge.cpp
+++ clangd/index/Merge.cpp
@@ -74,6 +74,12 @@
         Callback(*Sym);
   }
 
+  void findOccurrences(const OccurrencesRequest &Req,
+                       llvm::function_ref<void(const SymbolOccurrence &)>
+                           Callback) const override {
+    // FIXME(hokein): implement it.
+  }
+
 private:
   const SymbolIndex *Dynamic, *Static;
 };
Index: clangd/index/MemIndex.h
===================================================================
--- clangd/index/MemIndex.h
+++ clangd/index/MemIndex.h
@@ -22,24 +22,31 @@
 public:
   /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
   /// accessible as long as `Symbols` is kept alive.
-  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols,
+             SymbolOccurrenceSlab OccurrenceSlab = {});
 
   /// \brief Build index from a symbol slab.
   static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab);
 
   bool
   fuzzyFind(const FuzzyFindRequest &Req,
             llvm::function_ref<void(const Symbol &)> Callback) const override;
 
-  virtual void
+  void
   lookup(const LookupRequest &Req,
          llvm::function_ref<void(const Symbol &)> Callback) const override;
 
+  void findOccurrences(const OccurrencesRequest &Req,
+                       llvm::function_ref<void(const SymbolOccurrence &)>
+                           Callback) const override;
+
 private:
   std::shared_ptr<std::vector<const Symbol *>> Symbols;
   // Index is a set of symbols that are deduplicated by symbol IDs.
   // FIXME: build smarter index structure.
   llvm::DenseMap<SymbolID, const Symbol *> Index;
+
+  SymbolOccurrenceSlab Occurrences;
   mutable std::mutex Mutex;
 };
 
Index: clangd/index/MemIndex.cpp
===================================================================
--- clangd/index/MemIndex.cpp
+++ clangd/index/MemIndex.cpp
@@ -15,7 +15,8 @@
 namespace clang {
 namespace clangd {
 
-void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms,
+                     SymbolOccurrenceSlab OccurrenceSlab) {
   llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
   for (const Symbol *Sym : *Syms)
     TempIndex[Sym->ID] = Sym;
@@ -25,6 +26,7 @@
     std::lock_guard<std::mutex> Lock(Mutex);
     Index = std::move(TempIndex);
     Symbols = std::move(Syms); // Relase old symbols.
+    Occurrences = std::move(OccurrenceSlab);
   }
 }
 
@@ -87,5 +89,17 @@
   return std::move(MemIdx);
 }
 
+void MemIndex::findOccurrences(
+    const OccurrencesRequest &Req,
+    llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
+  for (const auto &ID : Req.IDs) {
+    for (const auto& Occurrence : Occurrences.find(ID)) {
+      if (static_cast<uint32_t>(Req.Filter & Occurrence.Kind)) {
+       Callback(Occurrence);
+      }
+    }
+  }
+}
+
 } // namespace clangd
 } // namespace clang
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -17,6 +17,8 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/StringSaver.h"
 #include <array>
 #include <string>
 
@@ -215,7 +217,6 @@
   // Optional details of the symbol.
   const Details *Detail = nullptr;
 
-  // FIXME: add all occurrences support.
   // FIXME: add extra fields for index scoring signals.
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
@@ -280,6 +281,96 @@
   std::vector<Symbol> Symbols;  // Sorted by SymbolID to allow lookup.
 };
 
+// Describes the kind of a symbol occurrence.
+//
+// This is a bitfield which can be combined from different kinds.
+enum class SymbolOccurrenceKind : uint8_t {
+  Unknown = 0,
+  Declaration = static_cast<uint8_t>(index::SymbolRole::Declaration),
+  Definition = static_cast<uint8_t>(index::SymbolRole::Definition),
+  Reference = static_cast<uint8_t>(index::SymbolRole::Reference),
+};
+inline SymbolOccurrenceKind operator|(SymbolOccurrenceKind L,
+                                      SymbolOccurrenceKind R) {
+  return static_cast<SymbolOccurrenceKind>(static_cast<uint8_t>(L) |
+                                           static_cast<uint8_t>(R));
+}
+inline SymbolOccurrenceKind &operator|=(SymbolOccurrenceKind &L,
+                                        SymbolOccurrenceKind R) {
+  return L = L | R;
+}
+inline SymbolOccurrenceKind operator&(SymbolOccurrenceKind A,
+                                      SymbolOccurrenceKind B) {
+  return static_cast<SymbolOccurrenceKind>(static_cast<uint8_t>(A) &
+                                           static_cast<uint8_t>(B));
+}
+raw_ostream &operator<<(raw_ostream &, SymbolOccurrenceKind);
+
+// Represents a symbol occurrence in the source file. It could be a
+// declaration/definition/reference occurrence.
+struct SymbolOccurrence {
+  // The location of the occurrence.
+  // WARNING: Location does not own the underlying data - FileURI are owned by a
+  // SymbolOccurrenceSlab. Copies are shallow.
+  SymbolLocation Location;
+  SymbolOccurrenceKind Kind;
+};
+inline bool operator==(const SymbolOccurrence &L, const SymbolOccurrence &R){
+  return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind);
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolOccurrence &S);
+
+// An efficient structure of storing large set of symbol occurrences in memory.
+// Filenames are deduplicated.
+class SymbolOccurrenceSlab {
+ public:
+  SymbolOccurrenceSlab() = default;
+
+  SymbolOccurrenceSlab(SymbolOccurrenceSlab &&Old)
+      : Arena(std::move(Old.Arena)), Occurrences(std::move(Old.Occurrences)) {}
+  SymbolOccurrenceSlab &operator=(SymbolOccurrenceSlab &&Old) {
+    Arena = std::move(Old.Arena);
+    Occurrences = std::move(Old.Occurrences);
+    return *this;
+  }
+
+  llvm::ArrayRef<SymbolOccurrence> find(const SymbolID& ID) const {
+    auto It = Occurrences.find(ID);
+    if (It == Occurrences.end())
+      return {};
+    return It->second;
+  }
+
+  // A mutable container that can 'freeze' to SymbolOccurrenceSlab.
+  // The frozen OccurrenceSlab will use less memory.
+  class Builder {
+  public:
+    Builder() : UniqueStrings(Arena) {}
+    // Adds a symbol occurrence.
+    // This is a deep copy: underlying FileURI will be owned by the slab.
+    void insert(const SymbolID &SymID, const SymbolOccurrence &Occurrence);
+
+    // Consumes the builder to finalize the slab.
+    SymbolOccurrenceSlab build() &&;
+
+  private:
+    llvm::BumpPtrAllocator Arena;
+    UniqueStringSaver UniqueStrings;
+
+    llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>> Occurrences;
+  };
+
+private:
+  SymbolOccurrenceSlab(
+      llvm::BumpPtrAllocator Arena,
+      llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>> Occurrences)
+      : Arena(std::move(Arena)), Occurrences(std::move(Occurrences)) {}
+
+  llvm::BumpPtrAllocator Arena;
+  llvm::DenseMap<SymbolID, std::vector<SymbolOccurrence>> Occurrences;
+};
+
 struct FuzzyFindRequest {
   /// \brief A query string for the fuzzy find. This is matched against symbols'
   /// un-qualified identifiers and should not contain qualifiers like "::".
@@ -305,6 +396,11 @@
   llvm::DenseSet<SymbolID> IDs;
 };
 
+struct OccurrencesRequest {
+  llvm::DenseSet<SymbolID> IDs;
+  SymbolOccurrenceKind Filter;
+};
+
 /// \brief Interface for symbol indexes that can be used for searching or
 /// matching symbols among a set of symbols based on names or unique IDs.
 class SymbolIndex {
@@ -327,8 +423,14 @@
   lookup(const LookupRequest &Req,
          llvm::function_ref<void(const Symbol &)> Callback) const = 0;
 
-  // FIXME: add interfaces for more index use cases:
-  //  - getAllOccurrences(SymbolID);
+  /// CrossReference finds all symbol occurrences (e.g. references,
+  /// declarations, definitions) and applies \p Callback on each result.
+  ///
+  /// The API is not responsible to rank results.
+  /// The returned result must be deep-copied if it's used outside Callback.
+  virtual void findOccurrences(
+      const OccurrencesRequest &Req,
+      llvm::function_ref<void(const SymbolOccurrence &)> Callback) const = 0;
 };
 
 } // namespace clangd
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -134,5 +134,34 @@
   return SymbolSlab(std::move(NewArena), std::move(Symbols));
 }
 
+raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K) {
+  if (K == SymbolOccurrenceKind::Unknown)
+    return OS << "unknown";
+  if (K == SymbolOccurrenceKind::Declaration)
+    return OS << "declaration";
+  if (K == SymbolOccurrenceKind::Definition)
+    return OS << "definition";
+  if (K == SymbolOccurrenceKind::Reference)
+    return OS << "reference";
+  return OS;
+}
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolOccurrence &S) {
+  OS << S.Location << S.Kind;
+  return OS;
+}
+
+void SymbolOccurrenceSlab::Builder::insert(const SymbolID &SymID,
+                                           const SymbolOccurrence &Occurrence) {
+  auto& SymOccurrences = Occurrences[SymID];
+  SymOccurrences.push_back(Occurrence);
+  SymOccurrences.back().Location.FileURI =
+      UniqueStrings.save(Occurrence.Location.FileURI);
+}
+
+// Consumes the builder to finalize the slab.
+SymbolOccurrenceSlab SymbolOccurrenceSlab::Builder::build() && {
+  return SymbolOccurrenceSlab(std::move(Arena), std::move(Occurrences));
+}
+
 } // namespace clangd
 } // namespace clang
Index: clangd/index/FileIndex.h
===================================================================
--- clangd/index/FileIndex.h
+++ clangd/index/FileIndex.h
@@ -73,6 +73,10 @@
   void lookup(const LookupRequest &Req,
               llvm::function_ref<void(const Symbol &)> Callback) const override;
 
+
+  void findOccurrences(const OccurrencesRequest &Req,
+                       llvm::function_ref<void(const SymbolOccurrence &)>
+                           Callback) const override;
 private:
   FileSymbols FSymbols;
   MemIndex Index;
Index: clangd/index/FileIndex.cpp
===================================================================
--- clangd/index/FileIndex.cpp
+++ clangd/index/FileIndex.cpp
@@ -105,5 +105,11 @@
   Index.lookup(Req, Callback);
 }
 
+void FileIndex::findOccurrences(
+    const OccurrencesRequest &Req,
+    llvm::function_ref<void(const SymbolOccurrence &)> Callback) const {
+  // FIXME(hokein): implement it.
+}
+
 } // namespace clangd
 } // namespace clang
Index: clangd/FindSymbols.h
===================================================================
--- clangd/FindSymbols.h
+++ clangd/FindSymbols.h
@@ -17,9 +17,9 @@
 #include "llvm/ADT/StringRef.h"
 
 namespace clang {
-class ParsedAST;
 namespace clangd {
 class SymbolIndex;
+class ParsedAST;
 
 /// Searches for the symbols matching \p Query. The syntax of \p Query can be
 /// the non-qualified name or fully qualified of a symbol. For example, "vector"
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to