ioeric created this revision.
ioeric added a reviewer: sammccall.
Herald added subscribers: cfe-commits, jkorous, MaskRay, ilya-biryukov.

Also move unittest: URI scheme to TestFS so that it can be shared by
different tests.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D47935

Files:
  clangd/ClangdServer.cpp
  clangd/ClangdServer.h
  clangd/CodeComplete.cpp
  clangd/Quality.cpp
  clangd/Quality.h
  clangd/index/FileIndex.cpp
  clangd/index/FileIndex.h
  unittests/clangd/ClangdTests.cpp
  unittests/clangd/FileIndexTests.cpp
  unittests/clangd/QualityTests.cpp
  unittests/clangd/TestFS.cpp
  unittests/clangd/URITests.cpp

Index: unittests/clangd/URITests.cpp
===================================================================
--- unittests/clangd/URITests.cpp
+++ unittests/clangd/URITests.cpp
@@ -14,46 +14,20 @@
 
 namespace clang {
 namespace clangd {
+
+// Force the unittest URI scheme to be linked,
+extern volatile int UnittestSchemeAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest =
+    UnittestSchemeAnchorSource;
+
 namespace {
 
 using ::testing::AllOf;
 
 MATCHER_P(Scheme, S, "") { return arg.scheme() == S; }
 MATCHER_P(Authority, A, "") { return arg.authority() == A; }
 MATCHER_P(Body, B, "") { return arg.body() == B; }
 
-// Assume all files in the schema have a "test-root/" root directory, and the
-// schema path is the relative path to the root directory.
-// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
-class TestScheme : public URIScheme {
-public:
-  static const char *Scheme;
-
-  static const char *TestRoot;
-
-  llvm::Expected<std::string>
-  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
-                  llvm::StringRef HintPath) const override {
-    auto Pos = HintPath.find(TestRoot);
-    assert(Pos != llvm::StringRef::npos);
-    return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body)
-        .str();
-  }
-
-  llvm::Expected<URI>
-  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
-    auto Pos = AbsolutePath.find(TestRoot);
-    assert(Pos != llvm::StringRef::npos);
-    return URI(Scheme, /*Authority=*/"",
-               AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size()));
-  }
-};
-
-const char *TestScheme::Scheme = "unittest";
-const char *TestScheme::TestRoot = "/test-root/";
-
-static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
-
 std::string createOrDie(llvm::StringRef AbsolutePath,
                         llvm::StringRef Scheme = "file") {
   auto Uri = URI::create(AbsolutePath, Scheme);
Index: unittests/clangd/TestFS.cpp
===================================================================
--- unittests/clangd/TestFS.cpp
+++ unittests/clangd/TestFS.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "TestFS.h"
+#include "URI.h"
 #include "llvm/Support/Errc.h"
 #include "gtest/gtest.h"
 
@@ -62,5 +63,50 @@
   return Path.str();
 }
 
+// Assume all files in the schema have a "test-root/" root directory, and the
+// schema path is the relative path to the root directory.
+// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
+class TestScheme : public URIScheme {
+public:
+  static const char *Scheme;
+
+  static const char *TestRoot;
+
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
+                  llvm::StringRef HintPath) const override {
+    auto Pos = HintPath.find(TestRoot);
+    assert(Pos != llvm::StringRef::npos);
+    return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body)
+        .str();
+  }
+
+  llvm::Expected<URI>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    auto Pos = AbsolutePath.find(TestRoot);
+    if (Pos == llvm::StringRef::npos)
+      return llvm::make_error<llvm::StringError>(
+          llvm::Twine("Directory ") + TestRoot + " not found in path " +
+              AbsolutePath,
+          llvm::inconvertibleErrorCode());
+
+    return URI(Scheme, /*Authority=*/"",
+               AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size()));
+  }
+};
+
+const char *TestScheme::Scheme = "unittest";
+#ifdef _WIN32
+const char *TestScheme::TestRoot = "\\test-root\\";
+#else
+const char *TestScheme::TestRoot = "/test-root/";
+#endif
+
+static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the plugin.
+volatile int UnittestSchemeAnchorSource = 0;
+
 } // namespace clangd
 } // namespace clang
Index: unittests/clangd/QualityTests.cpp
===================================================================
--- unittests/clangd/QualityTests.cpp
+++ unittests/clangd/QualityTests.cpp
@@ -18,12 +18,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "Quality.h"
+#include "TestFS.h"
 #include "TestTU.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
 namespace clangd {
+
+// Force the unittest URI scheme to be linked,
+extern volatile int UnittestSchemeAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED UnittestSchemeAnchorDest =
+    UnittestSchemeAnchorSource;
+
 namespace {
 
 TEST(QualityTests, SymbolQualitySignalExtraction) {
@@ -160,6 +167,84 @@
   EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
 }
 
+// {a,b,c} becomes /clangd-test/a/b/c
+std::string joinPaths(llvm::ArrayRef<StringRef> Parts) {
+  return testPath(
+      llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator()));
+}
+
+Symbol symWithDeclURI(StringRef Path, std::string &Storage,
+                      StringRef Scheme = "file") {
+  Symbol Sym;
+  auto U = URI::create(Path, Scheme);
+  EXPECT_TRUE(static_cast<bool>(U)) << llvm::toString(U.takeError());
+  Storage =  U->toString();
+  Sym.CanonicalDeclaration.FileURI = Storage;
+  return Sym;
+}
+
+TEST(QualityTests, IndexSymbolProximityScores) {
+  SymbolRelevanceSignals Relevance;
+  std::string Path = joinPaths({"a", "b", "c", "x"});
+  Relevance.ProximityPath = Path;
+
+  std::string Storage;
+
+  Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "x"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1.0);
+
+  Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "y"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.9);
+
+  Relevance.merge(symWithDeclURI(joinPaths({"a", "y"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.80);
+
+  Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "c", "d", "y"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.85);
+
+  Relevance.merge(
+      symWithDeclURI(joinPaths({"a", "b", "m", "n", "o", "y"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.4);
+
+  Relevance.merge(
+      symWithDeclURI(joinPaths({"a", "t", "m", "n", "o", "y"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.2);
+
+  // Note the common directory is /clang-test/
+  Relevance.merge(symWithDeclURI(joinPaths({"m", "n", "o", "y"}), Storage));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.2);
+
+  // Different URI schemes.
+  Relevance.merge(symWithDeclURI(joinPaths({"a", "b", "test-root", "a", "y"}),
+                                 Storage,
+                                 /*Scheme=*/"unittest"));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0);
+}
+
+TEST(QualityTests, IndexSymbolProximityScoresWithTestURI) {
+  SymbolRelevanceSignals Relevance;
+  std::string Path = joinPaths({"a", "test-root", "b", "c", "x"});
+  Relevance.ProximityPath = Path;
+
+  std::string Storage;
+
+  Relevance.merge(
+      symWithDeclURI(joinPaths({"remote", "test-root", "b", "c", "x"}), Storage,
+                     /*Scheme=*/"unittest"));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 1);
+
+  Relevance.merge(
+      symWithDeclURI(joinPaths({"remote", "test-root", "y"}), Storage,
+                     /*Scheme=*/"unittest"));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.8);
+
+  // unittest:///b/c/x vs unittest:///m/n/y. No common directory.
+  Relevance.merge(
+      symWithDeclURI(joinPaths({"remote", "test-root", "m", "n", "y"}), Storage,
+                     /*Scheme=*/"unittest"));
+  EXPECT_FLOAT_EQ(Relevance.ProximityScore, 0.0);
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: unittests/clangd/FileIndexTests.cpp
===================================================================
--- unittests/clangd/FileIndexTests.cpp
+++ unittests/clangd/FileIndexTests.cpp
@@ -96,6 +96,42 @@
   M.update(File.Filename, &AST.getASTContext(), AST.getPreprocessorPtr());
 }
 
+class TestURIScheme : public URIScheme {
+public:
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
+                  llvm::StringRef /*HintPath*/) const override {
+    llvm_unreachable("ClangdTests never makes absolute path.");
+  }
+
+  llvm::Expected<URI>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    return URI::parse(HardcodedURI);
+  }
+  static const char HardcodedURI[];
+  static const char Scheme[];
+};
+
+const char TestURIScheme::HardcodedURI[] = "ClangdTests:///test";
+const char TestURIScheme::Scheme[] = "ClangdTest";
+
+static URISchemeRegistry::Add<TestURIScheme> X(TestURIScheme::Scheme,
+                                               "Test scheme for ClangdTests.");
+
+TEST(FileIndexTest, CustomizedURIScheme) {
+  FileIndex M({TestURIScheme::Scheme});
+  update(M, "f", "class string {};");
+
+  FuzzyFindRequest Req;
+  Req.Query = "";
+  bool SeenSymbol = false;
+  M.fuzzyFind(Req, [&](const Symbol &Sym) {
+    EXPECT_EQ(Sym.CanonicalDeclaration.FileURI, TestURIScheme::HardcodedURI);
+    SeenSymbol = true;
+  });
+  EXPECT_TRUE(SeenSymbol);
+}
+
 TEST(FileIndexTest, IndexAST) {
   FileIndex M;
   update(M, "f1", "namespace ns { void f() {} class X {}; }");
Index: unittests/clangd/ClangdTests.cpp
===================================================================
--- unittests/clangd/ClangdTests.cpp
+++ unittests/clangd/ClangdTests.cpp
@@ -152,28 +152,6 @@
   }
 };
 
-constexpr const char* ClangdTestScheme = "ClangdTests";
-class TestURIScheme : public URIScheme {
-public:
-  llvm::Expected<std::string>
-  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
-                  llvm::StringRef /*HintPath*/) const override {
-    llvm_unreachable("ClangdTests never makes absolute path.");
-  }
-
-  llvm::Expected<URI>
-  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
-    llvm_unreachable("ClangdTest never creates a test URI.");
-  }
-
-  llvm::Expected<std::string> getIncludeSpelling(const URI &U) const override {
-    return ("\"" + U.body().trim("/") + "\"").str();
-  }
-};
-
-static URISchemeRegistry::Add<TestURIScheme>
-    X(ClangdTestScheme, "Test scheme for ClangdTests.");
-
 TEST_F(ClangdVFSTest, Parse) {
   // FIXME: figure out a stable format for AST dumps, so that we can check the
   // output of the dump itself is equal to the expected one, not just that it's
Index: clangd/index/FileIndex.h
===================================================================
--- clangd/index/FileIndex.h
+++ clangd/index/FileIndex.h
@@ -56,6 +56,10 @@
 /// \brief This manages symbls from files and an in-memory index on all symbols.
 class FileIndex : public SymbolIndex {
 public:
+  /// If URISchemes is empty, the default schemes in SymbolCollector will be
+  /// used.
+  FileIndex(std::vector<std::string> URISchemes = {});
+
   /// \brief Update symbols in \p Path with symbols in \p AST. If \p AST is
   /// nullptr, this removes all symbols in the file.
   /// If \p AST is not null, \p PP cannot be null and it should be the
@@ -72,11 +76,14 @@
 private:
   FileSymbols FSymbols;
   MemIndex Index;
+  std::vector<std::string> URISchemes;
 };
 
 /// Retrieves namespace and class level symbols in \p AST.
 /// Exposed to assist in unit tests.
-SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP);
+/// If URISchemes is empty, the default schemes in SymbolCollector will be used.
+SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
+                    llvm::ArrayRef<std::string> URISchemes = {});
 
 } // namespace clangd
 } // namespace clang
Index: clangd/index/FileIndex.cpp
===================================================================
--- clangd/index/FileIndex.cpp
+++ clangd/index/FileIndex.cpp
@@ -15,15 +15,18 @@
 namespace clang {
 namespace clangd {
 
-SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP) {
+SymbolSlab indexAST(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
+                    llvm::ArrayRef<std::string> URISchemes) {
   SymbolCollector::Options CollectorOpts;
   // FIXME(ioeric): we might also want to collect include headers. We would need
   // to make sure all includes are canonicalized (with CanonicalIncludes), which
   // is not trivial given the current way of collecting symbols: we only have
   // AST at this point, but we also need preprocessor callbacks (e.g.
   // CommentHandler for IWYU pragma) to canonicalize includes.
   CollectorOpts.CollectIncludePath = false;
   CollectorOpts.CountReferences = false;
+  if (!URISchemes.empty())
+    CollectorOpts.URISchemes = URISchemes;
 
   SymbolCollector Collector(std::move(CollectorOpts));
   Collector.setPreprocessor(PP);
@@ -41,6 +44,9 @@
   return Collector.takeSymbols();
 }
 
+FileIndex::FileIndex(std::vector<std::string> URISchemes)
+    : URISchemes(std::move(URISchemes)) {}
+
 void FileSymbols::update(PathRef Path, std::unique_ptr<SymbolSlab> Slab) {
   std::lock_guard<std::mutex> Lock(Mutex);
   if (!Slab)
@@ -79,7 +85,7 @@
   } else {
     assert(PP);
     auto Slab = llvm::make_unique<SymbolSlab>();
-    *Slab = indexAST(*AST, PP);
+    *Slab = indexAST(*AST, PP, URISchemes);
     FSymbols.update(Path, std::move(Slab));
   }
   auto Symbols = FSymbols.allSymbols();
Index: clangd/Quality.h
===================================================================
--- clangd/Quality.h
+++ clangd/Quality.h
@@ -70,6 +70,8 @@
   /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
   float NameMatch = 1;
   bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
+  // If set, this is used to compute proximity from symbol's declaring file.
+  llvm::StringRef ProximityPath;
   /// Proximity between best declaration and the query. [0-1], 1 is closest.
   float ProximityScore = 0;
 
Index: clangd/Quality.cpp
===================================================================
--- clangd/Quality.cpp
+++ clangd/Quality.cpp
@@ -7,6 +7,7 @@
 //
 //===---------------------------------------------------------------------===//
 #include "Quality.h"
+#include "URI.h"
 #include "index/Index.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclVisitor.h"
@@ -163,9 +164,52 @@
   return SymbolRelevanceSignals::GlobalScope;
 }
 
+/// Calculates a proximity score between two URI strings that have the same
+/// scheme. This does not parse URI. A URI (sans "<scheme>:") is split into
+/// chunks by '/' and each chunk is considered a file/directory. For example,
+/// "uri:///a/b/c" will be treated as /a/b/c
+static float uriProximity(StringRef UX, StringRef UY) {
+  auto SchemeSplitX = UX.split(':');
+  auto SchemeSplitY = UY.split(':');
+  assert((SchemeSplitX.first == SchemeSplitY.first) &&
+         "URIs must have the same scheme in order to compute proximity.");
+  auto Split = [](StringRef URIWithoutScheme) {
+    SmallVector<StringRef, 8> Split;
+    URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+    return Split;
+  };
+  SmallVector<StringRef, 8> Xs = Split(SchemeSplitX.second);
+  SmallVector<StringRef, 8> Ys = Split(SchemeSplitY.second);
+  auto X = Xs.begin(), Y = Ys.begin(), XE = Xs.end(), YE = Ys.end();
+  for (; X != XE && Y != YE && *X == *Y; ++X, ++Y) {
+  }
+  auto Dist = (XE - X) + (YE - Y);
+  // Either UX and UY are in the same directory or one is in an ancestor
+  // directory of the other.
+  // 1.0 same file, 0.9 in the same directory; -0.05 for each directory away.
+  if (X >= (XE - 1) || Y >= (YE - 1))
+    return 1.0 - Dist * 0.05;
+  if (X == Xs.begin() && Y == Ys.begin()) // No common directory.
+    return 0;
+  return std::max(0.0, 1.0 - 0.1 * Dist);
+}
+
 void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
   // FIXME: Index results always assumed to be at global scope. If Scope becomes
   // relevant to non-completion requests, we should recognize class members etc.
+
+  ProximityScore = 0.0;
+  StringRef SymURI = IndexResult.CanonicalDeclaration.FileURI;
+  if (!ProximityPath.empty() && !SymURI.empty()) {
+    // Only calculate proximity score for two URIs with the same scheme so that
+    // the computation can be purely text-based and thus avoid expensive URI
+    // encoding/decoding.
+    if (auto U = URI::create(ProximityPath, SymURI.split(':').first)) {
+      ProximityScore += uriProximity(SymURI, U->toString());
+    } else {
+      llvm::consumeError(U.takeError());
+    }
+  }
 }
 
 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -1026,6 +1026,7 @@
 
     SymbolQualitySignals Quality;
     SymbolRelevanceSignals Relevance;
+    Relevance.ProximityPath = FileName;
     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
     if (auto FuzzyScore = Filter->match(C.Name))
       Relevance.NameMatch = *FuzzyScore;
Index: clangd/ClangdServer.h
===================================================================
--- clangd/ClangdServer.h
+++ clangd/ClangdServer.h
@@ -80,6 +80,10 @@
     /// opened files and uses the index to augment code completion results.
     bool BuildDynamicSymbolIndex = false;
 
+    /// URI schemes to use when building the dynamic index.
+    /// If empty, the default schemes in SymbolCollector will be used.
+    std::vector<std::string> URISchemes;
+
     /// If set, use this index to augment code completion results.
     SymbolIndex *StaticIndex = nullptr;
 
Index: clangd/ClangdServer.cpp
===================================================================
--- clangd/ClangdServer.cpp
+++ clangd/ClangdServer.cpp
@@ -86,7 +86,8 @@
     : CompileArgs(CDB, Opts.ResourceDir ? Opts.ResourceDir->str()
                                         : getStandardResourceDir()),
       DiagConsumer(DiagConsumer), FSProvider(FSProvider),
-      FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex() : nullptr),
+      FileIdx(Opts.BuildDynamicSymbolIndex ? new FileIndex(Opts.URISchemes)
+                                           : nullptr),
       PCHs(std::make_shared<PCHContainerOperations>()),
       // Pass a callback into `WorkScheduler` to extract symbols from a newly
       // parsed file and rebuild the file index synchronously each time an AST
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to