ioeric updated this revision to Diff 126775.
ioeric added a comment.
- Remove everything except for SymbolIndex interface and MemIndex
- Added unit tests for MemIndex
Repository:
rCTE Clang Tools Extra
https://reviews.llvm.org/D40548
Files:
clangd/CMakeLists.txt
clangd/index/Index.cpp
clangd/index/Index.h
clangd/index/MemIndex.cpp
clangd/index/MemIndex.h
unittests/clangd/CMakeLists.txt
unittests/clangd/IndexTests.cpp
Index: unittests/clangd/IndexTests.cpp
===================================================================
--- /dev/null
+++ unittests/clangd/IndexTests.cpp
@@ -0,0 +1,118 @@
+//===-- IndexTests.cpp -------------------------------*- C++ -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+ Symbol Sym;
+ Sym.ID = SymbolID(ID);
+ Sym.QualifiedName = ID;
+ return Sym;
+}
+
+struct CountedSymbolSlab {
+ CountedSymbolSlab() = delete;
+
+ explicit CountedSymbolSlab(int &Counter) : Counter(Counter) { ++Counter; }
+
+ ~CountedSymbolSlab() { Counter--; }
+
+ SymbolSlab Slab;
+ // A counter for the number of living slabs.
+ int &Counter;
+};
+
+class MemIndexTest : public ::testing::Test {
+protected:
+ std::vector<std::string>
+ match(std::shared_ptr<std::vector<const Symbol *>> Symbols,
+ const FuzzyFindRequest &Req) {
+ Index.build(std::move(Symbols));
+ std::vector<std::string> Matches;
+ Index.fuzzyFind(
+ Req, [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+ return Matches;
+ }
+
+ // Build a CountedSymbolSlab containing symbols with IDs and names [Begin,
+ // End]. The life time of the slab is managed by the returned shared_ptr,
+ // which points to a vector of pointers pointing to all symbols in the slab.
+ std::shared_ptr<std::vector<const Symbol *>>
+ generateNumSymbols(int Begin, int End) {
+ auto Slab = std::make_shared<CountedSymbolSlab>(SlabCounter);
+
+ for (int i = Begin; i <= End; i++)
+ Slab->Slab.insert(symbol(std::to_string(i)));
+ auto *Symbols = new std::vector<const Symbol *>();
+
+ for (const auto &Sym : Slab->Slab)
+ Symbols->push_back(&Sym.second);
+ return std::shared_ptr<std::vector<const Symbol *>>(std::move(Slab),
+ Symbols);
+ }
+
+ int SlabCounter = 0;
+ MemIndex Index;
+};
+
+TEST_F(MemIndexTest, MemIndexSymbolsRecycled) {
+ FuzzyFindRequest Req;
+ Req.Query = "7";
+ auto Matches = match(generateNumSymbols(0, 10), Req);
+ EXPECT_THAT(Matches, UnorderedElementsAre("7"));
+
+ EXPECT_EQ(SlabCounter, 1);
+ // Release old symbols.
+ Index.build(std::make_shared<std::vector<const Symbol *>>());
+ EXPECT_EQ(SlabCounter, 0);
+}
+
+TEST_F(MemIndexTest, MemIndexMatchSubstring) {
+ FuzzyFindRequest Req;
+ Req.Query = "5";
+ auto Matches = match(generateNumSymbols(5, 25), Req);
+ EXPECT_THAT(Matches, UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST_F(MemIndexTest, MemIndexDeduplicate) {
+ auto Symbols = generateNumSymbols(0, 10);
+
+ // Inject some duplicates and make sure we only match the same symbol once.
+ auto Sym = symbol("7");
+ Symbols->push_back(&Sym);
+ Symbols->push_back(&Sym);
+ Symbols->push_back(&Sym);
+
+ FuzzyFindRequest Req;
+ Req.Query = "7";
+ auto Matches = match(std::move(Symbols), Req);
+ EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST_F(MemIndexTest, MemIndexLimitedNumMatches) {
+ FuzzyFindRequest Req;
+ Req.Query = "5";
+ Req.MaxCandidateCount = 3;
+ auto Matches = match(generateNumSymbols(0, 100), Req);
+ EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===================================================================
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -13,6 +13,7 @@
CodeCompleteTests.cpp
ContextTests.cpp
FuzzyMatchTests.cpp
+ IndexTests.cpp
JSONExprTests.cpp
TestFS.cpp
TraceTests.cpp
Index: clangd/index/MemIndex.h
===================================================================
--- /dev/null
+++ clangd/index/MemIndex.h
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index for a (relatively small) set of symbols that
+/// can be easily managed in memory.
+class MemIndex : public SymbolIndex {
+public:
+ /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
+ /// accessible as long as `Symbols` is kept alive.
+ void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+ bool fuzzyFind(const FuzzyFindRequest &Req,
+ std::function<void(const Symbol &)> Callback) const override;
+
+private:
+ std::shared_ptr<std::vector<const Symbol *>> Symbols;
+ // Index is a set of symbols that are deduplicated by symbol IDs.
+ // FIXME: build smarter index structure.
+ llvm::DenseMap<SymbolID, const Symbol *> Index;
+ mutable std::mutex Mutex;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
Index: clangd/index/MemIndex.cpp
===================================================================
--- /dev/null
+++ clangd/index/MemIndex.cpp
@@ -0,0 +1,51 @@
+//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+
+#include "MemIndex.h"
+
+namespace clang {
+namespace clangd {
+
+void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+
+ llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
+ for (const Symbol *Sym : *Syms)
+ TempIndex[Sym->ID] = Sym;
+
+ // Swap out the old symbols and index.
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Index = std::move(TempIndex);
+ Symbols = std::move(Syms); // Relase old symbols.
+ }
+}
+
+bool MemIndex::fuzzyFind(const FuzzyFindRequest &Req,
+ std::function<void(const Symbol &)> Callback) const {
+ std::string LoweredQuery = llvm::StringRef(Req.Query).lower();
+ unsigned Matched = 0;
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ for (const auto Pair : Index) {
+ const Symbol *Sym = Pair.second;
+ // Find all symbols that contain the query, igoring cases.
+ // FIXME: use better matching algorithm, e.g. fuzzy matcher.
+ if (StringRef(StringRef(Sym->QualifiedName).lower())
+ .contains(LoweredQuery)) {
+ if (++Matched > Req.MaxCandidateCount)
+ return false;
+ Callback(*Sym);
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -110,6 +110,33 @@
llvm::DenseMap<SymbolID, Symbol> Symbols;
};
+struct FuzzyFindRequest {
+ /// \brief A query string for the fuzzy find. This is matched against symbols'
+ /// qualfified names.
+ std::string Query;
+ /// \brief The maxinum number of candidates to return.
+ size_t MaxCandidateCount = UINT_MAX;
+};
+
+/// \brief Interface for symbol indexes that can be used for searching or
+/// matching symbols among a set of symbols based on names or unique IDs.
+class SymbolIndex {
+public:
+ virtual ~SymbolIndex() = default;
+
+ /// \brief Matches symbols in the index fuzzily and applies \p Callback on
+ /// each matched symbol.
+ ///
+ /// Returns true if all candidates are matched.
+ virtual bool
+ fuzzyFind(const FuzzyFindRequest &Req,
+ std::function<void(const Symbol &)> Callback) const = 0;
+
+ // FIXME: add interfaces for more index use cases:
+ // - Symbol getSymbolInfo(llvm::StringRef USR);
+ // - getAllOccurrences(llvm::StringRef USR);
+};
+
} // namespace clangd
} // namespace clang
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Index.h"
-
+#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/Support/SHA1.h"
namespace clang {
@@ -20,6 +20,7 @@
}
} // namespace
+
SymbolID::SymbolID(llvm::StringRef USR)
: HashValue(llvm::SHA1::hash(toArrayRef(USR))) {}
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -19,6 +19,7 @@
Protocol.cpp
ProtocolHandlers.cpp
Trace.cpp
+ index/MemIndex.cpp
index/Index.cpp
index/SymbolCollector.cpp
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits