sammccall created this revision.
sammccall added reviewers: ioeric, hokein.
Herald added subscribers: cfe-commits, jkorous-apple, ilya-biryukov, klimek.
(This isn't done! Needs new tests, and some random cleanups should be split out.
Looking for some early feedback.)
Within a TU:
- as now, collect a declaration from the first occurrence of a symbol (taking
clang's canonical declaration)
- when we first see a definition occurrence, copy the symbol and add it
Across TUs/sources:
- mergeSymbol in Merge.h is responsible for combining matching Symbols. This
covers dynamic/static merges and cross-TU merges in the static index.
- it prefers declarations from Symbols that have a definition.
- GlobalSymbolBuilderMain is modified to use mergeSymbol as a reduce step.
Random cleanups (can be pulled out):
- SymbolFromYAML -> SymbolsFromYAML, new singular SymbolFromYAML added
- avoid uninit'd SymbolLocations. Add an idiomatic way to check "absent".
- CanonicalDeclaration (as well as Definition) are mapped as optional in YAML.
- added operator<< for Symbol & SymbolLocation, for debugging
Repository:
rCTE Clang Tools Extra
https://reviews.llvm.org/D42942
Files:
clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
clangd/index/Index.cpp
clangd/index/Index.h
clangd/index/Merge.cpp
clangd/index/SymbolCollector.cpp
clangd/index/SymbolCollector.h
clangd/index/SymbolYAML.cpp
clangd/index/SymbolYAML.h
clangd/tool/ClangdMain.cpp
unittests/clangd/SymbolCollectorTests.cpp
Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -439,18 +439,18 @@
...
)";
- auto Symbols1 = SymbolFromYAML(YAML1);
+ auto Symbols1 = SymbolsFromYAML(YAML1);
EXPECT_THAT(Symbols1, UnorderedElementsAre(
AllOf(QName("clang::Foo1"), Labeled("Foo1-label"),
Doc("Foo doc"), Detail("int"))));
- auto Symbols2 = SymbolFromYAML(YAML2);
+ auto Symbols2 = SymbolsFromYAML(YAML2);
EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf(QName("clang::Foo2"),
Labeled("Foo2-label"),
Not(HasDetail()))));
std::string ConcatenatedYAML =
SymbolsToYAML(Symbols1) + SymbolsToYAML(Symbols2);
- auto ConcatenatedSymbols = SymbolFromYAML(ConcatenatedYAML);
+ auto ConcatenatedSymbols = SymbolsFromYAML(ConcatenatedYAML);
EXPECT_THAT(ConcatenatedSymbols,
UnorderedElementsAre(QName("clang::Foo1"),
QName("clang::Foo2")));
Index: clangd/tool/ClangdMain.cpp
===================================================================
--- clangd/tool/ClangdMain.cpp
+++ clangd/tool/ClangdMain.cpp
@@ -37,7 +37,7 @@
llvm::errs() << "Can't open " << YamlSymbolFile << "\n";
return nullptr;
}
- auto Slab = SymbolFromYAML(Buffer.get()->getBuffer());
+ auto Slab = SymbolsFromYAML(Buffer.get()->getBuffer());
SymbolSlab::Builder SymsBuilder;
for (auto Sym : Slab)
SymsBuilder.insert(Sym);
Index: clangd/index/SymbolYAML.h
===================================================================
--- clangd/index/SymbolYAML.h
+++ clangd/index/SymbolYAML.h
@@ -25,7 +25,11 @@
namespace clangd {
// Read symbols from a YAML-format string.
-SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent);
+SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent);
+
+// Read one symbol from a YAML-format string, backed by the arena.
+Symbol SymbolFromYAML(llvm::StringRef YAMLContent,
+ llvm::BumpPtrAllocator &Arena);
// Convert a single symbol to YAML-format string.
// The YAML result is safe to concatenate.
Index: clangd/index/SymbolYAML.cpp
===================================================================
--- clangd/index/SymbolYAML.cpp
+++ clangd/index/SymbolYAML.cpp
@@ -97,7 +97,9 @@
IO.mapRequired("Name", Sym.Name);
IO.mapRequired("Scope", Sym.Scope);
IO.mapRequired("SymInfo", Sym.SymInfo);
- IO.mapRequired("CanonicalDeclaration", Sym.CanonicalDeclaration);
+ IO.mapOptional("CanonicalDeclaration", Sym.CanonicalDeclaration,
+ SymbolLocation());
+ IO.mapOptional("Definition", Sym.Definition, SymbolLocation());
IO.mapRequired("CompletionLabel", Sym.CompletionLabel);
IO.mapRequired("CompletionFilterText", Sym.CompletionFilterText);
IO.mapRequired("CompletionPlainInsertText", Sym.CompletionPlainInsertText);
@@ -160,7 +162,7 @@
namespace clang {
namespace clangd {
-SymbolSlab SymbolFromYAML(llvm::StringRef YAMLContent) {
+SymbolSlab SymbolsFromYAML(llvm::StringRef YAMLContent) {
// Store data of pointer fields (excl. `StringRef`) like `Detail`.
llvm::BumpPtrAllocator Arena;
llvm::yaml::Input Yin(YAMLContent, &Arena);
@@ -173,6 +175,14 @@
return std::move(Syms).build();
}
+Symbol SymbolFromYAML(llvm::StringRef YAMLContent,
+ llvm::BumpPtrAllocator &Arena) {
+ llvm::yaml::Input Yin(YAMLContent, &Arena);
+ Symbol S;
+ Yin >> S;
+ return S;
+}
+
std::string SymbolsToYAML(const SymbolSlab& Symbols) {
std::string Str;
llvm::raw_string_ostream OS(Str);
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -53,6 +53,9 @@
SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
private:
+ const Symbol *addDeclaration(const NamedDecl &, SymbolID);
+ void addDefinition(const NamedDecl &, const Symbol &DeclSymbol);
+
// All Symbols collected from the AST.
SymbolSlab::Builder Symbols;
ASTContext *ASTCtx;
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -117,30 +117,34 @@
// For symbols defined inside macros:
// * use expansion location, if the symbol is formed via macro concatenation.
// * use spelling location, otherwise.
-SymbolLocation GetSymbolLocation(const NamedDecl *D, SourceManager &SM,
+SymbolLocation getSymbolLocation(const NamedDecl &D, SourceManager &SM,
StringRef FallbackDir,
std::string &FilePathStorage) {
- SymbolLocation Location;
-
- SourceLocation Loc = SM.getSpellingLoc(D->getLocation());
- if (D->getLocation().isMacroID()) {
+ SourceLocation Loc = SM.getSpellingLoc(D.getLocation());
+ if (D.getLocation().isMacroID()) {
// The symbol is formed via macro concatenation, the spelling location will
// be "<scratch space>", which is not interesting to us, use the expansion
// location instead.
if (llvm::StringRef(Loc.printToString(SM)).startswith("<scratch")) {
FilePathStorage = makeAbsolutePath(
- SM, SM.getFilename(SM.getExpansionLoc(D->getLocation())),
+ SM, SM.getFilename(SM.getExpansionLoc(D.getLocation())),
FallbackDir);
- return {FilePathStorage,
- SM.getFileOffset(SM.getExpansionRange(D->getLocStart()).first),
- SM.getFileOffset(SM.getExpansionRange(D->getLocEnd()).second)};
+ SymbolLocation Result;
+ Result.FilePath = FilePathStorage;
+ Result.StartOffset =
+ SM.getFileOffset(SM.getExpansionRange(D.getLocStart()).first);
+ Result.EndOffset =
+ SM.getFileOffset(SM.getExpansionRange(D.getLocEnd()).second);
+ return Result;
}
}
FilePathStorage = makeAbsolutePath(SM, SM.getFilename(Loc), FallbackDir);
- return {FilePathStorage,
- SM.getFileOffset(SM.getSpellingLoc(D->getLocStart())),
- SM.getFileOffset(SM.getSpellingLoc(D->getLocEnd()))};
+ SymbolLocation Result;
+ Result.FilePath = FilePathStorage;
+ Result.StartOffset = SM.getFileOffset(SM.getSpellingLoc(D.getLocStart()));
+ Result.EndOffset = SM.getFileOffset(SM.getSpellingLoc(D.getLocEnd()));
+ return Result;
}
} // namespace
@@ -176,62 +180,85 @@
return true;
auto ID = SymbolID(USR);
- if (Symbols.find(ID) != nullptr)
- return true;
-
- auto &SM = ND->getASTContext().getSourceManager();
-
- std::string QName;
- llvm::raw_string_ostream OS(QName);
- PrintingPolicy Policy(ASTCtx->getLangOpts());
- // Note that inline namespaces are treated as transparent scopes. This
- // reflects the way they're most commonly used for lookup. Ideally we'd
- // include them, but at query time it's hard to find all the inline
- // namespaces to query: the preamble doesn't have a dedicated list.
- Policy.SuppressUnwrittenScope = true;
- ND->printQualifiedName(OS, Policy);
- OS.flush();
-
- Symbol S;
- S.ID = std::move(ID);
- std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
- S.SymInfo = index::getSymbolInfo(D);
- std::string FilePath;
- S.CanonicalDeclaration = GetSymbolLocation(ND, SM, Opts.FallbackDir, FilePath);
-
- // Add completion info.
- assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
- CodeCompletionResult SymbolCompletion(ND, 0);
- const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
- *ASTCtx, *PP, CodeCompletionContext::CCC_Name, *CompletionAllocator,
- *CompletionTUInfo,
- /*IncludeBriefComments*/ true);
- std::string Label;
- std::string SnippetInsertText;
- std::string IgnoredLabel;
- std::string PlainInsertText;
- getLabelAndInsertText(*CCS, &Label, &SnippetInsertText,
- /*EnableSnippets=*/true);
- getLabelAndInsertText(*CCS, &IgnoredLabel, &PlainInsertText,
- /*EnableSnippets=*/false);
- std::string FilterText = getFilterText(*CCS);
- std::string Documentation = getDocumentation(*CCS);
- std::string CompletionDetail = getDetail(*CCS);
-
- S.CompletionFilterText = FilterText;
- S.CompletionLabel = Label;
- S.CompletionPlainInsertText = PlainInsertText;
- S.CompletionSnippetInsertText = SnippetInsertText;
- Symbol::Details Detail;
- Detail.Documentation = Documentation;
- Detail.CompletionDetail = CompletionDetail;
- S.Detail = &Detail;
-
- Symbols.insert(S);
+ const Symbol* BasicSymbol = Symbols.find(ID);
+ if (!BasicSymbol) // Regardless of role, ND is the canonical declaration.
+ BasicSymbol = addDeclaration(*ND, std::move(ID));
+ if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
+ addDefinition(*cast<NamedDecl>(ASTNode.OrigD), *BasicSymbol);
}
-
return true;
}
+const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND,
+ SymbolID ID) {
+ auto &SM = ND.getASTContext().getSourceManager();
+
+ std::string QName;
+ llvm::raw_string_ostream OS(QName);
+ PrintingPolicy Policy(ASTCtx->getLangOpts());
+ // Note that inline namespaces are treated as transparent scopes. This
+ // reflects the way they're most commonly used for lookup. Ideally we'd
+ // include them, but at query time it's hard to find all the inline
+ // namespaces to query: the preamble doesn't have a dedicated list.
+ Policy.SuppressUnwrittenScope = true;
+ ND.printQualifiedName(OS, Policy);
+ OS.flush();
+
+ Symbol S;
+ S.ID = std::move(ID);
+ std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
+ S.SymInfo = index::getSymbolInfo(&ND);
+ std::string FilePath;
+ // FIXME: we may want a different "canonical" heuristic than clang chooses.
+ S.CanonicalDeclaration =
+ getSymbolLocation(ND, SM, Opts.FallbackDir, FilePath);
+
+ // Add completion info.
+ // FIXME: we may want to choose a different redecl, or combine from several.
+ assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
+ CodeCompletionResult SymbolCompletion(&ND, 0);
+ const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
+ *ASTCtx, *PP, CodeCompletionContext::CCC_Name, *CompletionAllocator,
+ *CompletionTUInfo,
+ /*IncludeBriefComments*/ true);
+ std::string Label;
+ std::string SnippetInsertText;
+ std::string IgnoredLabel;
+ std::string PlainInsertText;
+ getLabelAndInsertText(*CCS, &Label, &SnippetInsertText,
+ /*EnableSnippets=*/true);
+ getLabelAndInsertText(*CCS, &IgnoredLabel, &PlainInsertText,
+ /*EnableSnippets=*/false);
+ std::string FilterText = getFilterText(*CCS);
+ std::string Documentation = getDocumentation(*CCS);
+ std::string CompletionDetail = getDetail(*CCS);
+
+ S.CompletionFilterText = FilterText;
+ S.CompletionLabel = Label;
+ S.CompletionPlainInsertText = PlainInsertText;
+ S.CompletionSnippetInsertText = SnippetInsertText;
+ Symbol::Details Detail;
+ Detail.Documentation = Documentation;
+ Detail.CompletionDetail = CompletionDetail;
+ S.Detail = &Detail;
+
+ Symbols.insert(S);
+ return Symbols.find(S.ID);
+}
+
+void SymbolCollector::addDefinition(const NamedDecl &ND,
+ const Symbol &DeclSym) {
+ if (DeclSym.Definition)
+ return;
+ // If we saw some forward declaration, we end up copying the symbol.
+ // This is not ideal, but avoids duplicating the "is this a definition" check
+ // in clang::index. We should only see one definition.
+ Symbol S = DeclSym;
+ std::string FilePath;
+ S.Definition = getSymbolLocation(ND, ND.getASTContext().getSourceManager(),
+ Opts.FallbackDir, FilePath);
+ Symbols.insert(S);
+}
+
} // namespace clangd
} // namespace clang
Index: clangd/index/Merge.cpp
===================================================================
--- clangd/index/Merge.cpp
+++ clangd/index/Merge.cpp
@@ -63,7 +63,11 @@
Symbol S = L;
// For each optional field, fill it from R if missing in L.
// (It might be missing in R too, but that's a no-op).
- if (S.CanonicalDeclaration.FilePath == "")
+ if (!S.Definition)
+ S.Definition = R.Definition;
+ // Prefer the canonical declaration from TUs that saw the definition.
+ // Classes: this is the def itself. Functions: hopefully the header decl.
+ if (!S.CanonicalDeclaration || R.Definition)
S.CanonicalDeclaration = R.CanonicalDeclaration;
if (S.CompletionLabel == "")
S.CompletionLabel = R.CompletionLabel;
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -27,11 +27,14 @@
llvm::StringRef FilePath;
// The 0-based offset to the first character of the symbol from the beginning
// of the source file.
- unsigned StartOffset;
+ unsigned StartOffset = 0;
// The 0-based offset to the last character of the symbol from the beginning
// of the source file.
- unsigned EndOffset;
+ unsigned EndOffset = 0;
+
+ operator bool() const { return !FilePath.empty(); }
};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &);
// The class identifies a particular C++ symbol (class, function, method, etc).
//
@@ -44,7 +47,7 @@
class SymbolID {
public:
SymbolID() = default;
- SymbolID(llvm::StringRef USR);
+ explicit SymbolID(llvm::StringRef USR);
bool operator==(const SymbolID &Sym) const {
return HashValue == Sym.HashValue;
@@ -125,6 +128,7 @@
// * For non-inline functions, the canonical declaration is a declaration
// (not a definition), which is usually declared in ".h" file.
SymbolLocation CanonicalDeclaration;
+ SymbolLocation Definition;
/// A brief description of the symbol that can be displayed in the completion
/// candidate list. For example, "Foo(X x, Y y) const" is a labal for a
@@ -160,6 +164,7 @@
// FIXME: add all occurrences support.
// FIXME: add extra fields for index scoring signals.
};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
// An immutable symbol container that stores a set of symbols.
// The container will maintain the lifetime of the symbols.
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -10,11 +10,18 @@
#include "Index.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/SHA1.h"
+#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace clangd {
using namespace llvm;
+raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) {
+ if (!L)
+ return OS << "(none)";
+ return OS << L.FilePath << "[" << L.StartOffset << "-" << L.EndOffset << "]";
+}
+
SymbolID::SymbolID(StringRef USR)
: HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
@@ -29,6 +36,10 @@
std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin());
}
+raw_ostream &operator<<(raw_ostream &OS, const Symbol &S) {
+ return OS << S.Scope << S.Name;
+}
+
SymbolSlab::const_iterator SymbolSlab::find(const SymbolID &ID) const {
auto It = std::lower_bound(Symbols.begin(), Symbols.end(), ID,
[](const Symbol &S, const SymbolID &I) {
@@ -55,6 +66,7 @@
Intern(S.Name);
Intern(S.Scope);
Intern(S.CanonicalDeclaration.FilePath);
+ Intern(S.Definition.FilePath);
Intern(S.CompletionLabel);
Intern(S.CompletionFilterText);
Index: clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
===================================================================
--- clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
+++ clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
@@ -14,9 +14,11 @@
//===---------------------------------------------------------------------===//
#include "index/Index.h"
+#include "index/Merge.h"
#include "index/SymbolCollector.h"
#include "index/SymbolYAML.h"
#include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/CompilerInstance.h"
#include "clang/Index/IndexDataConsumer.h"
#include "clang/Index/IndexingAction.h"
#include "clang/Tooling/CommonOptionsParser.h"
@@ -34,6 +36,7 @@
namespace clang {
namespace clangd {
+namespace {
static llvm::cl::opt<std::string> AssumedHeaderDir(
"assume-header-dir",
@@ -60,6 +63,12 @@
index::createIndexingAction(C, Opts, nullptr)),
Ctx(Ctx), Collector(C) {}
+ // XXX this is just to make running the tool fast during dev!
+ bool BeginInvocation(CompilerInstance &CI) override {
+ const auto &Inputs = CI.getInvocation().getFrontendOpts().Inputs;
+ return !Inputs.empty() && Inputs[0].getFile().contains("clangd/index");
+ }
+
void EndSourceFileAction() override {
WrapperFrontendAction::EndSourceFileAction();
@@ -91,6 +100,25 @@
tooling::ExecutionContext *Ctx;
};
+// Combine occurrences of the same symbol across translation units.
+SymbolSlab mergeSymbols(tooling::ToolResults *Results) {
+ SymbolSlab::Builder UniqueSymbols;
+ llvm::BumpPtrAllocator Arena;
+ Symbol::Details Scratch;
+ Results->forEachResult([&](llvm::StringRef Key, llvm::StringRef Value) {
+ Arena.Reset();
+ auto Sym = clang::clangd::SymbolFromYAML(Value, Arena);
+ clang::clangd::SymbolID ID;
+ Key >> ID;
+ if (const auto *Existing = UniqueSymbols.find(ID))
+ UniqueSymbols.insert(mergeSymbol(*Existing, Sym, &Scratch));
+ else
+ UniqueSymbols.insert(Sym);
+ });
+ return std::move(UniqueSymbols).build();
+}
+
+} // namespace
} // namespace clangd
} // namespace clang
@@ -115,29 +143,20 @@
return 1;
}
+ // Map phase: emit symbols found in each translation unit.
auto Err = Executor->get()->execute(
llvm::make_unique<clang::clangd::SymbolIndexActionFactory>(
Executor->get()->getExecutionContext()));
if (Err) {
llvm::errs() << llvm::toString(std::move(Err)) << "\n";
}
- // Deduplicate the result by key and keep the longest value.
- // FIXME(ioeric): Merge occurrences, rather than just dropping all but one.
- // Definitions and forward declarations have the same key and may both have
- // information. Usage count will need to be aggregated across occurrences,
- // too.
- llvm::StringMap<llvm::StringRef> UniqueSymbols;
- Executor->get()->getToolResults()->forEachResult(
- [&UniqueSymbols](llvm::StringRef Key, llvm::StringRef Value) {
- auto Ret = UniqueSymbols.try_emplace(Key, Value);
- if (!Ret.second) {
- // If key already exists, keep the longest value.
- llvm::StringRef &V = Ret.first->second;
- V = V.size() < Value.size() ? Value : V;
- }
- });
+ // Reduce phase: combine symbols using the ID as a key.
+ auto UniqueSymbols =
+ clang::clangd::mergeSymbols(Executor->get()->getToolResults());
+
+ // Output phase: emit YAML for result symbols.
for (const auto &Sym : UniqueSymbols)
- llvm::outs() << Sym.second;
+ llvm::outs() << SymbolToYAML(Sym);
return 0;
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits