hokein updated this revision to Diff 126378.
hokein marked 5 inline comments as done.
hokein added a comment.
Address comments on SymbolID.
Repository:
rCTE Clang Tools Extra
https://reviews.llvm.org/D40897
Files:
clangd/CMakeLists.txt
clangd/index/CMakeLists.txt
clangd/index/Index.cpp
clangd/index/Index.h
clangd/index/SymbolCollector.cpp
clangd/index/SymbolCollector.h
unittests/clangd/CMakeLists.txt
unittests/clangd/SymbolCollectorTests.cpp
Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- /dev/null
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -0,0 +1,112 @@
+//===-- SymbolCollectorTests.cpp -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/SymbolCollector.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include <memory>
+#include <string>
+
+using testing::UnorderedElementsAre;
+using testing::Eq;
+using testing::Field;
+
+// GMock helpers for matching Symbol.
+MATCHER_P(QName, Name, "") { return arg.second.QualifiedName == Name; }
+
+namespace clang {
+namespace clangd {
+
+namespace {
+class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+ public:
+ SymbolIndexActionFactory() = default;
+
+ clang::FrontendAction *create() override {
+ index::IndexingOptions IndexOpts;
+ IndexOpts.SystemSymbolFilter =
+ index::IndexingOptions::SystemSymbolFilterKind::All;
+ IndexOpts.IndexFunctionLocals = false;
+ Collector = std::make_shared<SymbolCollector>();
+ FrontendAction *Action =
+ index::createIndexingAction(Collector, IndexOpts, nullptr).release();
+ return Action;
+ }
+
+ std::shared_ptr<SymbolCollector> Collector;
+};
+
+class SymbolCollectorTest : public ::testing::Test {
+public:
+ bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) {
+ llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+ new vfs::InMemoryFileSystem);
+ llvm::IntrusiveRefCntPtr<FileManager> Files(
+ new FileManager(FileSystemOptions(), InMemoryFileSystem));
+
+ const std::string FileName = "symbol.cc";
+ const std::string HeaderName = "symbols.h";
+ auto Factory = llvm::make_unique<SymbolIndexActionFactory>();
+
+ tooling::ToolInvocation Invocation(
+ {"symbol_collector", "-fsyntax-only", "-std=c++11", FileName},
+ Factory->create(), Files.get(),
+ std::make_shared<PCHContainerOperations>());
+
+ InMemoryFileSystem->addFile(HeaderName, 0,
+ llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+
+ std::string Content = "#include\"" + std::string(HeaderName) + "\"";
+ Content += "\n" + MainCode.str();
+ InMemoryFileSystem->addFile(FileName, 0,
+ llvm::MemoryBuffer::getMemBuffer(Content));
+ Invocation.run();
+ Symbols = Factory->Collector->takeSymbols();
+
+ EXPECT_EQ(FileName, Factory->Collector->getFilename());
+ return true;
+ }
+
+protected:
+ SymbolSlab Symbols;
+};
+
+TEST_F(SymbolCollectorTest, CollectSymbol) {
+ const std::string Header = R"(
+ class Foo {
+ void f();
+ };
+ void f1();
+ inline void f2() {}
+ )";
+ const std::string Main = R"(
+ namespace {
+ void ff() {} // ignore
+ }
+ void f1() {}
+ )";
+ runSymbolCollector(Header, Main);
+ EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Foo::f"),
+ QName("f1"), QName("f2")));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===================================================================
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -15,12 +15,14 @@
JSONExprTests.cpp
TestFS.cpp
TraceTests.cpp
+ SymbolCollectorTests.cpp
)
target_link_libraries(ClangdTests
PRIVATE
clangBasic
clangDaemon
+ clangdIndex
clangFormat
clangFrontend
clangSema
Index: clangd/index/SymbolCollector.h
===================================================================
--- /dev/null
+++ clangd/index/SymbolCollector.h
@@ -0,0 +1,52 @@
+//===--- SymbolCollector.h ---------------------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Index.h"
+
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/Index/IndexSymbol.h"
+
+namespace clang {
+namespace clangd {
+
+// Collect all symbols from an AST.
+//
+// Clients (e.g. clangd) can use SymbolCollector together with
+// index::indexTopLevelDecls to retrieve all symbols when the source file is
+// changed.
+class SymbolCollector : public index::IndexDataConsumer {
+public:
+ SymbolCollector() = default;
+
+ void initialize(ASTContext &Ctx) override;
+
+ bool
+ handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles,
+ ArrayRef<index::SymbolRelation> Relations, FileID FID,
+ unsigned Offset,
+ index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
+
+ StringRef getFilename() const {
+ return Filename;
+ }
+
+ void finish() override;
+
+ SymbolSlab takeSymbols() const { return std::move(Symbols); }
+
+private:
+ // The file path where the AST comes from.
+ std::string Filename;
+
+ // All Symbols collected from the AST.
+ SymbolSlab Symbols;
+};
+
+} // namespace clangd
+} // namespace clang
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- /dev/null
+++ clangd/index/SymbolCollector.cpp
@@ -0,0 +1,99 @@
+//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolCollector.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Index/IndexSymbol.h"
+#include "clang/Index/USRGeneration.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+namespace clang {
+namespace clangd {
+
+namespace {
+// Make the Path absolute using the current working directory of the given
+// SourceManager if the Path is not an absolute path.
+//
+// The Path can be a path relative to the build directory, or retrieved from
+// the SourceManager.
+std::string makeAbsolutePath(const SourceManager &SM, StringRef Path) {
+ llvm::SmallString<128> AbsolutePath(Path);
+ if (std::error_code EC =
+ SM.getFileManager().getVirtualFileSystem()->makeAbsolute(
+ AbsolutePath))
+ llvm::errs() << "Warning: could not make absolute file: '" << EC.message()
+ << '\n';
+ // Handle symbolic link path cases.
+ // We are trying to get the real file path of the symlink.
+ const DirectoryEntry *Dir = SM.getFileManager().getDirectory(
+ llvm::sys::path::parent_path(AbsolutePath.str()));
+ if (Dir) {
+ StringRef DirName = SM.getFileManager().getCanonicalName(Dir);
+ SmallVector<char, 128> AbsoluteFilename;
+ llvm::sys::path::append(AbsoluteFilename, DirName,
+ llvm::sys::path::filename(AbsolutePath.str()));
+ return llvm::StringRef(AbsoluteFilename.data(), AbsoluteFilename.size())
+ .str();
+ }
+ return AbsolutePath.str();
+}
+} // namespace
+
+void SymbolCollector::initialize(ASTContext &Ctx) {
+ auto FID = Ctx.getSourceManager().getMainFileID();
+ const auto *Entry = Ctx.getSourceManager().getFileEntryForID(FID);
+ Filename = Entry->tryGetRealPathName();
+}
+
+// Always return true to continue indexing.
+bool SymbolCollector::handleDeclOccurence(
+ const Decl *D, index::SymbolRoleSet Roles,
+ ArrayRef<index::SymbolRelation> Relations, FileID FID, unsigned Offset,
+ index::IndexDataConsumer::ASTNodeInfo ASTNode) {
+ // FIXME: collect all symbol references.
+ if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
+ Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
+ return true;
+
+ if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(D)) {
+ // FIXME: Should we include the internal linkage symbols?
+ if (!ND->hasExternalFormalLinkage() || ND->isInAnonymousNamespace())
+ return true;
+
+ llvm::SmallVector<char, 128> Buff;
+ if (index::generateUSRForDecl(ND, Buff))
+ return true;
+
+ std::string USR(Buff.data(), Buff.size());
+ auto ID = SymbolID(USR);
+ if (Symbols.find(ID) != Symbols.end())
+ return true;
+
+ auto &SM = ND->getASTContext().getSourceManager();
+ SymbolLocation Location = {
+ makeAbsolutePath(SM, SM.getFilename(D->getLocation())),
+ SM.getFileOffset(D->getLocStart()), SM.getFileOffset(D->getLocEnd())};
+ Symbols.insert({std::move(ID), ND->getQualifiedNameAsString(),
+ index::getSymbolInfo(D), std::move(Location)});
+ }
+
+ return true;
+}
+
+void SymbolCollector::finish() {
+ Symbols.freeze();
+}
+
+} // clangd
+} // clang
Index: clangd/index/Index.h
===================================================================
--- /dev/null
+++ clangd/index/Index.h
@@ -0,0 +1,125 @@
+//===--- Symbol.h -----------------------------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
+
+#include "clang/Index/IndexSymbol.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include <array>
+#include <string>
+
+namespace clang {
+namespace clangd {
+
+struct SymbolLocation {
+ // The absolute path of the source file where a symbol occurs.
+ std::string FilePath;
+ // The 0-based offset to the first character of the symbol from the beginning
+ // of the source file.
+ unsigned StartOffset;
+ // The 0-based offset to the last character of the symbol from the beginning
+ // of the source file.
+ unsigned EndOffset;
+};
+
+// The class identifies a particular C++ symbol (class, function, method, etc).
+//
+// As USRs (Unified Symbol Resolution) could be large, especially for functions
+// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
+// guarantee the uniqueness of symbols while using a relatively small amount of
+// memory (vs storing USRs directly).
+//
+// SymbolID can be used as key in the symbol indexes to lookup the symbol.
+struct SymbolID {
+ SymbolID() = default;
+ SymbolID(llvm::StringRef USR);
+ std::array<uint8_t, 20> HashValue;
+};
+
+// The class presents a C++ symbol, e.g. class, function.
+//
+// FIXME: instead of having own copy fields for each symbol, we can share
+// storage from SymbolSlab.
+struct Symbol {
+ // The ID of the symbol.
+ SymbolID ID;
+ // The qualified name of the symbol, e.g. Foo::bar.
+ std::string QualifiedName;
+ // The symbol information, like symbol kind.
+ index::SymbolInfo SymInfo;
+ // The location of the canonical declaration of the symbol.
+ //
+ // A C++ symbol could have multiple declarations and one definition (e.g.
+ // a function is declared in ".h" file, and is defined in ".cc" file).
+ // * For classes, the canonical declaration is usually definition.
+ // * For non-inline functions, the canonical declaration is a declaration
+ // (not a definition), which is usually declared in ".h" file.
+ SymbolLocation CanonicalDeclaration;
+
+ // FIXME: add definition location of the symbol.
+ // FIXME: add all occurrences support.
+ // FIXME: add extra fields for index scoring signals.
+ // FIXME: add code completion information.
+};
+
+// A symbol container that stores a set of symbols. The container will maintain
+// the lifetime of the symbols.
+//
+// FIXME: Use a space-efficient implementation, a lot of Symbol fields could
+// share the same storage.
+class SymbolSlab {
+ public:
+ using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;
+
+ SymbolSlab() = default;
+
+ const_iterator begin() const;
+ const_iterator end() const;
+ const_iterator find(const SymbolID& SymID) const;
+
+ // Once called, no more symbols would be added to the SymbolSlab. This
+ // operation is irreversible.
+ void freeze();
+
+ void insert(Symbol S);
+
+ private:
+ bool Frozen = false;
+
+ llvm::DenseMap<SymbolID, Symbol> Symbols;
+};
+
+} // namespace clangd
+} // namespace clang
+
+namespace llvm {
+
+template <> struct DenseMapInfo<clang::clangd::SymbolID> {
+ static inline clang::clangd::SymbolID getEmptyKey() {
+ return clang::clangd::SymbolID("EMPTYKEY");
+ }
+ static inline clang::clangd::SymbolID getTombstoneKey() {
+ return clang::clangd::SymbolID("TOMBSTONEKEY");
+ }
+ static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
+ return hash_value(
+ ArrayRef<uint8_t>(Sym.HashValue.data(), Sym.HashValue.size()));
+ }
+ static bool isEqual(const clang::clangd::SymbolID &LHS,
+ const clang::clangd::SymbolID &RHS) {
+ return LHS.HashValue == RHS.HashValue;
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
Index: clangd/index/Index.cpp
===================================================================
--- /dev/null
+++ clangd/index/Index.cpp
@@ -0,0 +1,49 @@
+//===--- Index.cpp -----------------------------------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Index.h"
+
+#include "llvm/Support/SHA1.h"
+
+namespace clang {
+namespace clangd {
+
+namespace {
+ArrayRef<uint8_t> toArrayRef(StringRef S) {
+ return {reinterpret_cast<const uint8_t *>(S.data()), S.size()};
+}
+} // namespace
+
+SymbolID::SymbolID(llvm::StringRef USR)
+ : HashValue(llvm::SHA1::hash(toArrayRef(USR))) {}
+
+SymbolSlab::const_iterator SymbolSlab::begin() const {
+ return Symbols.begin();
+}
+
+SymbolSlab::const_iterator SymbolSlab::end() const {
+ return Symbols.end();
+}
+
+SymbolSlab::const_iterator SymbolSlab::find(const SymbolID& SymID) const {
+ return Symbols.find(SymID);
+}
+
+void SymbolSlab::freeze() {
+ Frozen = true;
+}
+
+void SymbolSlab::insert(Symbol S) {
+ assert(!Frozen &&
+ "Can't insert a symbol after the slab has been frozen!");
+ Symbols[S.ID] = std::move(S);
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/index/CMakeLists.txt
===================================================================
--- /dev/null
+++ clangd/index/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_clang_library(clangdIndex
+ Index.cpp
+ SymbolCollector.cpp
+
+ LINK_LIBS
+ clangAST
+ clangBasic
+ clangIndex
+ ${LLVM_PTHREAD_LIB}
+ )
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -22,6 +22,7 @@
LINK_LIBS
clangAST
clangBasic
+ clangdIndex
clangFormat
clangFrontend
clangIndex
@@ -38,3 +39,4 @@
add_subdirectory(fuzzer)
endif()
add_subdirectory(tool)
+add_subdirectory(index)
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits