ioeric created this revision. ioeric added a reviewer: sammccall. Herald added subscribers: cfe-commits, jkorous, MaskRay, ilya-biryukov.
Also move unittest: URI scheme to TestFS so that it can be shared by different tests. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D47935 Files: clangd/ClangdServer.cpp clangd/ClangdServer.h clangd/CodeComplete.cpp clangd/Quality.cpp clangd/Quality.h clangd/index/FileIndex.cpp clangd/index/FileIndex.h unittests/clangd/ClangdTests.cpp unittests/clangd/FileIndexTests.cpp unittests/clangd/QualityTests.cpp unittests/clangd/TestFS.cpp unittests/clangd/URITests.cpp
Index: unittests/clangd/URITests.cpp =================================================================== --- unittests/clangd/URITests.cpp +++ unittests/clangd/URITests.cpp @@ -14,46 +14,20 @@ namespace clang { namespace clangd { + +// Force the unittest URI scheme to be linked, +extern volatile int UnittestSchemeAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest = + UnittestSchemeAnchorSource; + namespace { using ::testing::AllOf; MATCHER_P(Scheme, S, "") { return arg.scheme() == S; } MATCHER_P(Authority, A, "") { return arg.authority() == A; } MATCHER_P(Body, B, "") { return arg.body() == B; } -// Assume all files in the schema have a "test-root/" root directory, and the -// schema path is the relative path to the root directory. -// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". -class TestScheme : public URIScheme { -public: - static const char *Scheme; - - static const char *TestRoot; - - llvm::Expected<std::string> - getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, - llvm::StringRef HintPath) const override { - auto Pos = HintPath.find(TestRoot); - assert(Pos != llvm::StringRef::npos); - return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body) - .str(); - } - - llvm::Expected<URI> - uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { - auto Pos = AbsolutePath.find(TestRoot); - assert(Pos != llvm::StringRef::npos); - return URI(Scheme, /*Authority=*/"", - AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size())); - } -}; - -const char *TestScheme::Scheme = "unittest"; -const char *TestScheme::TestRoot = "/test-root/"; - -static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema"); - std::string createOrDie(llvm::StringRef AbsolutePath, llvm::StringRef Scheme = "file") { auto Uri = URI::create(AbsolutePath, Scheme); Index: unittests/clangd/TestFS.cpp =================================================================== --- unittests/clangd/TestFS.cpp +++ unittests/clangd/TestFS.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// #include "TestFS.h" +#include "URI.h" #include "llvm/Support/Errc.h" #include "gtest/gtest.h" @@ -62,5 +63,50 @@ return Path.str(); } +// Assume all files in the schema have a "test-root/" root directory, and the +// schema path is the relative path to the root directory. +// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". +class TestScheme : public URIScheme { +public: + static const char *Scheme; + + static const char *TestRoot; + + llvm::Expected<std::string> + getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, + llvm::StringRef HintPath) const override { + auto Pos = HintPath.find(TestRoot); + assert(Pos != llvm::StringRef::npos); + return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body) + .str(); + } + + llvm::Expected<URI> + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + auto Pos = AbsolutePath.find(TestRoot); + if (Pos == llvm::StringRef::npos) + return llvm::make_error<llvm::StringError>( + llvm::Twine("Directory ") + TestRoot + " not found in path " + + AbsolutePath, + llvm::inconvertibleErrorCode()); + + return URI(Scheme, /*Authority=*/"", + AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size())); + } +}; + +const char *TestScheme::Scheme = "unittest"; +#ifdef _WIN32 +const char *TestScheme::TestRoot = "\\test-root\\"; +#else +const char *TestScheme::TestRoot = "/test-root/"; +#endif + +static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema"); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the plugin. +volatile int UnittestSchemeAnchorSource = 0; + } // namespace clangd } // namespace clang Index: unittests/clangd/QualityTests.cpp =================================================================== --- unittests/clangd/QualityTests.cpp +++ unittests/clangd/QualityTests.cpp @@ -18,12 +18,19 @@ //===----------------------------------------------------------------------===// #include "Quality.h" +#include "TestFS.h" #include "TestTU.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang { namespace clangd { + +// Force the unittest URI scheme to be linked, +extern volatile int UnittestSchemeAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest = + UnittestSchemeAnchorSource; + namespace { TEST(QualityTests, SymbolQualitySignalExtraction) { @@ -160,6 +167,84 @@ EXPECT_LT(sortText(0, "a"), sortText(0, "z")); } +// {a,b,c} becomes /clangd-test/a/b/c +std::string joinPaths(llvm::ArrayRef<StringRef> Parts) { + return testPath( + llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator())); +} + +Symbol symWithDeclURI(StringRef Path, std::string &Storage, + StringRef Scheme = "file") { + Symbol Sym; + auto U = URI::create(Path, Scheme); + EXPECT_TRUE(static_cast<bool>(U)) << llvm::toString(U.takeError()); + Storage = U->toString(); + Sym.CanonicalDeclaration.FileURI = Storage; + return Sym; +} + +TEST(QualityTests, IndexSymbolProximityScores) { + SymbolRelevanceSignals Relevance; + std::string Path = joinPaths({"a", "b", "c", "x"}); + Relevance.ProximityPath = Path; + + std::string Storage; + + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "x"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0); + + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "y"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.9); + + Relevance.merge(symWithDeclURI(joinPaths({"a", "y"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.80); + + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "d", "y"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.85); + + Relevance.merge( + symWithDeclURI(joinPaths({"a", "b", "m", "n", "o", "y"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.4); + + Relevance.merge( + symWithDeclURI(joinPaths({"a", "t", "m", "n", "o", "y"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.2); + + // Note the common directory is /clang-test/ + Relevance.merge(symWithDeclURI(joinPaths({"m", "n", "o", "y"}), Storage)); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.2); + + // Different URI schemes. + Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "test-root", "a", "y"}), + Storage, + /*Scheme=*/"unittest")); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0); +} + +TEST(QualityTests, IndexSymbolProximityScoresWithTestURI) { + SymbolRelevanceSignals Relevance; + std::string Path = joinPaths({"a", "test-root", "b", "c", "x"}); + Relevance.ProximityPath = Path; + + std::string Storage; + + Relevance.merge( + symWithDeclURI(joinPaths({"remote", "test-root", "b", "c", "x"}), Storage, + /*Scheme=*/"unittest")); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1); + + Relevance.merge( + symWithDeclURI(joinPaths({"remote", "test-root", "y"}), Storage, + /*Scheme=*/"unittest")); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.8); + + // unittest:///b/c/x vs unittest:///m/n/y. No common directory. + Relevance.merge( + symWithDeclURI(joinPaths({"remote", "test-root", "m", "n", "y"}), Storage, + /*Scheme=*/"unittest")); + EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.0); +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/FileIndexTests.cpp =================================================================== --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -96,6 +96,42 @@ M.update(File.Filename, &AST.getASTContext(), AST.getPreprocessorPtr()); } +class TestURIScheme : public URIScheme { +public: + llvm::Expected<std::string> + getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, + llvm::StringRef /*HintPath*/) const override { + llvm_unreachable("ClangdTests never makes absolute path."); + } + + llvm::Expected<URI> + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + return URI::parse(HardcodedURI); + } + static const char HardcodedURI[]; + static const char Scheme[]; +}; + +const char TestURIScheme::HardcodedURI[] = "ClangdTests:///test"; +const char TestURIScheme::Scheme[] = "ClangdTest"; + +static URISchemeRegistry::Add<TestURIScheme> X(TestURIScheme::Scheme, + "Test scheme for ClangdTests."); + +TEST(FileIndexTest, CustomizedURIScheme) { + FileIndex M({TestURIScheme::Scheme}); + update(M, "f", "class string {};"); + + FuzzyFindRequest Req; + Req.Query = ""; + bool SeenSymbol = false; + M.fuzzyFind(Req, [&](const Symbol &Sym) { + EXPECT_EQ(Sym.CanonicalDeclaration.FileURI, TestURIScheme::HardcodedURI); + SeenSymbol = true; + }); + EXPECT_TRUE(SeenSymbol); +} + TEST(FileIndexTest, IndexAST) { FileIndex M; update(M, "f1", "namespace ns { void f() {} class X {}; }"); Index: unittests/clangd/ClangdTests.cpp =================================================================== --- unittests/clangd/ClangdTests.cpp +++ unittests/clangd/ClangdTests.cpp @@ -152,28 +152,6 @@ } }; -constexpr const char* ClangdTestScheme = "ClangdTests"; -class TestURIScheme : public URIScheme { -public: - llvm::Expected<std::string> - getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, - llvm::StringRef /*HintPath*/) const override { - llvm_unreachable("ClangdTests never makes absolute path."); - } - - llvm::Expected<URI> - uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { - llvm_unreachable("ClangdTest never creates a test URI."); - } - - llvm::Expected<std::string> getIncludeSpelling(const URI &U) const override { - return ("\"" + U.body().trim("/") + "\"").str(); - } -}; - -static URISchemeRegistry::Add<TestURIScheme> - X(ClangdTestScheme, "Test scheme for ClangdTests."); - TEST_F(ClangdVFSTest, Parse) { // FIXME: figure out a stable format for AST dumps, so that we can check the // output of the dump itself is equal to the expected one, not just that it's Index: clangd/index/FileIndex.h =================================================================== --- clangd/index/FileIndex.h +++ clangd/index/FileIndex.h @@ -56,6 +56,10 @@ /// \brief This manages symbls from files and an in-memory index on all symbols. class FileIndex : public SymbolIndex { public: + /// If URISchemes is empty, the default schemes in SymbolCollector will be + /// used. + FileIndex(std::vector<std::string> URISchemes = {}); + /// \brief Update symbols in \p Path with symbols in \p AST. If \p AST is /// nullptr, this removes all symbols in the file. /// If \p AST is not null, \p PP cannot be null and it should be the @@ -72,11 +76,14 @@ private: FileSymbols FSymbols; MemIndex Index; + std::vector<std::string> URISchemes; }; /// Retrieves namespace and class level symbols in \p AST. /// Exposed to assist in unit tests. -SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP); +/// If URISchemes is empty, the default schemes in SymbolCollector will be used. +SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP, + llvm::ArrayRef<std::string> URISchemes = {}); } // namespace clangd } // namespace clang Index: clangd/index/FileIndex.cpp =================================================================== --- clangd/index/FileIndex.cpp +++ clangd/index/FileIndex.cpp @@ -15,15 +15,18 @@ namespace clang { namespace clangd { -SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP) { +SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP, + llvm::ArrayRef<std::string> URISchemes) { SymbolCollector::Options CollectorOpts; // FIXME(ioeric): we might also want to collect include headers. We would need // to make sure all includes are canonicalized (with CanonicalIncludes), which // is not trivial given the current way of collecting symbols: we only have // AST at this point, but we also need preprocessor callbacks (e.g. // CommentHandler for IWYU pragma) to canonicalize includes. CollectorOpts.CollectIncludePath = false; CollectorOpts.CountReferences = false; + if (!URISchemes.empty()) + CollectorOpts.URISchemes = URISchemes; SymbolCollector Collector(std::move(CollectorOpts)); Collector.setPreprocessor(PP); @@ -41,6 +44,9 @@ return Collector.takeSymbols(); } +FileIndex::FileIndex(std::vector<std::string> URISchemes) + : URISchemes(std::move(URISchemes)) {} + void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Slab) { std::lock_guard<std::mutex> Lock(Mutex); if (!Slab) @@ -79,7 +85,7 @@ } else { assert(PP); auto Slab = llvm::make_unique<SymbolSlab>(); - *Slab = indexAST(*AST, PP); + *Slab = indexAST(*AST, PP, URISchemes); FSymbols.update(Path, std::move(Slab)); } auto Symbols = FSymbols.allSymbols(); Index: clangd/Quality.h =================================================================== --- clangd/Quality.h +++ clangd/Quality.h @@ -70,6 +70,8 @@ /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. float NameMatch = 1; bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). + // If set, this is used to compute proximity from symbol's declaring file. + llvm::StringRef ProximityPath; /// Proximity between best declaration and the query. [0-1], 1 is closest. float ProximityScore = 0; Index: clangd/Quality.cpp =================================================================== --- clangd/Quality.cpp +++ clangd/Quality.cpp @@ -7,6 +7,7 @@ // //===---------------------------------------------------------------------===// #include "Quality.h" +#include "URI.h" #include "index/Index.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclVisitor.h" @@ -163,9 +164,52 @@ return SymbolRelevanceSignals::GlobalScope; } +/// Calculates a proximity score between two URI strings that have the same +/// scheme. This does not parse URI. A URI (sans "<scheme>:") is split into +/// chunks by '/' and each chunk is considered a file/directory. For example, +/// "uri:///a/b/c" will be treated as /a/b/c +static float uriProximity(StringRef UX, StringRef UY) { + auto SchemeSplitX = UX.split(':'); + auto SchemeSplitY = UY.split(':'); + assert((SchemeSplitX.first == SchemeSplitY.first) && + "URIs must have the same scheme in order to compute proximity."); + auto Split = [](StringRef URIWithoutScheme) { + SmallVector<StringRef, 8> Split; + URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + return Split; + }; + SmallVector<StringRef, 8> Xs = Split(SchemeSplitX.second); + SmallVector<StringRef, 8> Ys = Split(SchemeSplitY.second); + auto X = Xs.begin(), Y = Ys.begin(), XE = Xs.end(), YE = Ys.end(); + for (; X != XE && Y != YE && *X == *Y; ++X, ++Y) { + } + auto Dist = (XE - X) + (YE - Y); + // Either UX and UY are in the same directory or one is in an ancestor + // directory of the other. + // 1.0 same file, 0.9 in the same directory; -0.05 for each directory away. + if (X >= (XE - 1) || Y >= (YE - 1)) + return 1.0 - Dist * 0.05; + if (X == Xs.begin() && Y == Ys.begin()) // No common directory. + return 0; + return std::max(0.0, 1.0 - 0.1 * Dist); +} + void SymbolRelevanceSignals::merge(const Symbol &IndexResult) { // FIXME: Index results always assumed to be at global scope. If Scope becomes // relevant to non-completion requests, we should recognize class members etc. + + ProximityScore = 0.0; + StringRef SymURI = IndexResult.CanonicalDeclaration.FileURI; + if (!ProximityPath.empty() && !SymURI.empty()) { + // Only calculate proximity score for two URIs with the same scheme so that + // the computation can be purely text-based and thus avoid expensive URI + // encoding/decoding. + if (auto U = URI::create(ProximityPath, SymURI.split(':').first)) { + ProximityScore += uriProximity(SymURI, U->toString()); + } else { + llvm::consumeError(U.takeError()); + } + } } void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) { Index: clangd/CodeComplete.cpp =================================================================== --- clangd/CodeComplete.cpp +++ clangd/CodeComplete.cpp @@ -1026,6 +1026,7 @@ SymbolQualitySignals Quality; SymbolRelevanceSignals Relevance; + Relevance.ProximityPath = FileName; Relevance.Query = SymbolRelevanceSignals::CodeComplete; if (auto FuzzyScore = Filter->match(C.Name)) Relevance.NameMatch = *FuzzyScore; Index: clangd/ClangdServer.h =================================================================== --- clangd/ClangdServer.h +++ clangd/ClangdServer.h @@ -80,6 +80,10 @@ /// opened files and uses the index to augment code completion results. bool BuildDynamicSymbolIndex = false; + /// URI schemes to use when building the dynamic index. + /// If empty, the default schemes in SymbolCollector will be used. + std::vector<std::string> URISchemes; + /// If set, use this index to augment code completion results. SymbolIndex *StaticIndex = nullptr; Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -86,7 +86,8 @@ : CompileArgs(CDB, Opts.ResourceDir ? Opts.ResourceDir->str() : getStandardResourceDir()), DiagConsumer(DiagConsumer), FSProvider(FSProvider), - FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex() : nullptr), + FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex(Opts.URISchemes) + : nullptr), PCHs(std::make_shared<PCHContainerOperations>()), // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits