nridge updated this revision to Diff 248630.
nridge marked 16 inline comments as done.
nridge added a comment.

Rebase onto D75479 <https://reviews.llvm.org/D75479> and address most review 
comments

Comments remaining to be addressed:

- revising the tests to exercise locateSymbolNamedTextuallyAt() directly
- comments related to fuzzy matching (I have an outstanding question about that)

Handling of dependent code has been deferred to a follow-up change


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72874/new/

https://reviews.llvm.org/D72874

Files:
  clang-tools-extra/clangd/FindSymbols.cpp
  clang-tools-extra/clangd/FindSymbols.h
  clang-tools-extra/clangd/XRefs.cpp
  clang-tools-extra/clangd/XRefs.h
  clang-tools-extra/clangd/unittests/XRefsTests.cpp

Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -596,6 +596,44 @@
   }
 }
 
+TEST(LocateSymbol, Textual) {
+  const char *Tests[] = {
+      R"cpp(// Comment
+        struct [[MyClass]] {};
+        // Comment mentioning M^yClass
+      )cpp",
+      R"cpp(// String
+        struct [[MyClass]] {};
+        const char* s = "String literal mentioning M^yClass";
+      )cpp",
+      R"cpp(// Ifdef'ed out code
+        struct [[MyClass]] {};
+        #ifdef WALDO
+          M^yClass var;
+        #endif
+      )cpp"};
+
+  for (const char *Test : Tests) {
+    Annotations T(Test);
+    llvm::Optional<Range> WantDecl;
+    if (!T.ranges().empty())
+      WantDecl = T.range();
+
+    auto TU = TestTU::withCode(T.code());
+
+    auto AST = TU.build();
+    auto Index = TU.index();
+    auto Results = locateSymbolAt(AST, T.point(), Index.get());
+
+    if (!WantDecl) {
+      EXPECT_THAT(Results, IsEmpty()) << Test;
+    } else {
+      ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test;
+      EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test;
+    }
+  }
+}
+
 TEST(LocateSymbol, Ambiguous) {
   auto T = Annotations(R"cpp(
     struct Foo {
@@ -670,6 +708,26 @@
                                    Sym("baz", T.range("StaticOverload2"))));
 }
 
+TEST(LocateSymbol, TextualAmbiguous) {
+  auto T = Annotations(R"cpp(
+        struct Foo {
+          void $FooLoc[[uniqueMethodName]]();
+        };
+        struct Bar {
+          void $BarLoc[[uniqueMethodName]]();
+        };
+        // Will call u^niqueMethodName() on t.
+        template <typename T>
+        void f(T t);
+      )cpp");
+  auto TU = TestTU::withCode(T.code());
+  auto AST = TU.build();
+  auto Index = TU.index();
+  EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()),
+              UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")),
+                                   Sym("uniqueMethodName", T.range("BarLoc"))));
+}
+
 TEST(LocateSymbol, TemplateTypedefs) {
   auto T = Annotations(R"cpp(
     template <class T> struct function {};
@@ -869,37 +927,37 @@
 
 TEST(LocateSymbol, NearbyIdentifier) {
   const char *Tests[] = {
-    R"cpp(
+      R"cpp(
       // regular identifiers (won't trigger)
       int hello;
       int y = he^llo;
     )cpp",
-    R"cpp(
+      R"cpp(
       // disabled preprocessor sections
       int [[hello]];
       #if 0
       int y = ^hello;
       #endif
     )cpp",
-    R"cpp(
+      R"cpp(
       // comments
       // he^llo, world
       int [[hello]];
     )cpp",
-    R"cpp(
+      R"cpp(
       // string literals
       int [[hello]];
       const char* greeting = "h^ello, world";
     )cpp",
 
-    R"cpp(
+      R"cpp(
       // can refer to macro invocations (even if they expand to nothing)
       #define INT int
       [[INT]] x;
       // I^NT
     )cpp",
 
-    R"cpp(
+      R"cpp(
       // prefer nearest occurrence
       int hello;
       int x = hello;
@@ -908,12 +966,12 @@
       int z = hello;
     )cpp",
 
-    R"cpp(
+      R"cpp(
       // short identifiers find near results
       int [[hi]];
       // h^i
     )cpp",
-    R"cpp(
+      R"cpp(
       // short identifiers don't find far results
       int hi;
 
@@ -922,13 +980,13 @@
       // h^i
     )cpp",
   };
-  for (const char* Test : Tests) {
+  for (const char *Test : Tests) {
     Annotations T(Test);
     auto AST = TestTU::withCode(T.code()).build();
     const auto &SM = AST.getSourceManager();
     llvm::Optional<Range> Nearby;
-    if (const auto*Tok = findNearbyIdentifier(
-        cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
+    if (const auto *Tok = findNearbyIdentifier(
+            cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
       Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
                                        Tok->location(), Tok->endLocation()));
     if (T.ranges().empty())
Index: clang-tools-extra/clangd/XRefs.h
===================================================================
--- clang-tools-extra/clangd/XRefs.h
+++ clang-tools-extra/clangd/XRefs.h
@@ -60,6 +60,21 @@
 const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
                                           const syntax::TokenBuffer &TB);
 
+// Tries to provide a textual fallback for locating a symbol referenced at
+// a location, by looking up the word under the cursor as a symbol name in the
+// index. The aim is to pick up references to symbols in contexts where
+// AST-based resolution does not work, such as comments, strings, and PP
+// disabled regions. The implementation takes a number of measures to avoid
+// false positives, such as looking for some signal that the word at the
+// given location is likely to be an identifier. The function does not
+// currently return results for locations that end up as real expanded
+// tokens, although this may be relaxed for e.g. dependent code in the future.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+                             SourceLocation Loc,
+                             const std::string &MainFilePath);
+
 /// Get all document links
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
 
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -10,9 +10,11 @@
 #include "CodeCompletionStrings.h"
 #include "FindSymbols.h"
 #include "FindTarget.h"
+#include "FuzzyMatch.h"
 #include "Logger.h"
 #include "ParsedAST.h"
 #include "Protocol.h"
+#include "Quality.h"
 #include "Selection.h"
 #include "SourceCode.h"
 #include "URI.h"
@@ -49,6 +51,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cctype>
 
 namespace clang {
 namespace clangd {
@@ -369,7 +372,7 @@
   // Updates BestTok and BestCost if Tok is a good candidate.
   // May return true if the cost is too high for this token.
   auto Consider = [&](const syntax::Token &Tok) {
-    if(!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
+    if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
       return false;
     // No point guessing the same location we started with.
     if (Tok.location() == WordStart)
@@ -412,6 +415,145 @@
   return BestTok;
 }
 
+static bool isLikelyToBeIdentifier(StringRef Word) {
+  // Word contains underscore.
+  // This handles things like snake_case and MACRO_CASE.
+  if (Word.contains('_')) {
+    return true;
+  }
+  // Word contains capital letter other than at beginning.
+  // This handles things like lowerCamel and UpperCamel.
+  // The check for also containing a lowercase letter is to rule out
+  // initialisms like "HTTP".
+  bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
+  bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
+  if (HasLower && HasUpper) {
+    return true;
+  }
+  // FIXME: There are other signals we could listen for.
+  // Some of these require inspecting the surroundings of the word as well.
+  //   - mid-sentence Capitalization
+  //   - markup like quotes / backticks / brackets / "\p"
+  //   - word has a qualifier (foo::bar)
+  return false;
+}
+
+using ScoredLocatedSymbol = std::pair<float, LocatedSymbol>;
+struct ScoredSymbolGreater {
+  bool operator()(const ScoredLocatedSymbol &L,
+                  const ScoredLocatedSymbol &R) const {
+    return L.first > R.first;
+  }
+};
+
+std::vector<LocatedSymbol>
+locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index,
+                             SourceLocation Loc,
+                             const std::string &MainFilePath) {
+  const auto &SM = AST.getSourceManager();
+  FileID File;
+  unsigned Pos;
+  std::tie(File, Pos) = SM.getDecomposedLoc(Loc);
+  llvm::StringRef Code = SM.getBufferData(File);
+  auto QueryString = wordTouching(Code, Pos);
+  if (!isLikelyToBeIdentifier(QueryString)) {
+    return {};
+  }
+
+  // If this is a real token that survived preprocessing, don't
+  // use the textual heuristic. This is to avoid false positives
+  // when over tokens that happen to correspond to an identifier
+  // name elsewhere.
+  // FIXME: Relax this for dependent code.
+  unsigned WordOffset = QueryString.data() - Code.data();
+  SourceLocation WordStart = SM.getComposedLoc(File, WordOffset);
+  // If this is a real token that survived preprocessing, don't use heuristics.
+  auto WordExpandedTokens =
+      AST.getTokens().expandedTokens(SM.getMacroArgExpandedLocation(WordStart));
+  if (!WordExpandedTokens.empty())
+    return {};
+
+  FuzzyFindRequest Req;
+  Req.Query = QueryString.str();
+  Req.ProximityPaths = {MainFilePath};
+  Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts());
+  // FIXME: For extra strictness, consider AnyScope=false.
+  Req.AnyScope = true;
+  // We limit the results to 3 further below. This limit is to avoid fetching
+  // too much data, while still likely having enough for 3 results to remain
+  // after additional filtering.
+  Req.Limit = 10;
+  TopN<ScoredLocatedSymbol, ScoredSymbolGreater> Top(*Req.Limit);
+  FuzzyMatcher Filter(Req.Query);
+  Index->fuzzyFind(Req, [&](const Symbol &Sym) {
+    auto MaybeDeclLoc =
+        symbolLocationToLocation(Sym.CanonicalDeclaration, MainFilePath);
+    if (!MaybeDeclLoc) {
+      log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError());
+      return;
+    }
+    Location DeclLoc = *MaybeDeclLoc;
+    Location DefLoc;
+    if (Sym.Definition) {
+      auto MaybeDefLoc = symbolLocationToLocation(Sym.Definition, MainFilePath);
+      if (!MaybeDefLoc) {
+        log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError());
+        return;
+      }
+      DefLoc = *MaybeDefLoc;
+    }
+    Location PreferredLoc = bool(Sym.Definition) ? DefLoc : DeclLoc;
+
+    // For now, only consider exact name matches, including case.
+    // This is to avoid too many false positives.
+    // We could relax this in the future if we make the query more accurate
+    // by other means.
+    if (Sym.Name != QueryString)
+      return;
+
+    // Exclude constructor results. They have the same name as the class,
+    // but we don't have enough context to prefer them over the class.
+    if (Sym.SymInfo.Kind == index::SymbolKind::Constructor)
+      return;
+
+    std::string Scope = std::string(Sym.Scope);
+    llvm::StringRef ScopeRef = Scope;
+    ScopeRef.consume_back("::");
+    LocatedSymbol Located;
+    Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
+    Located.PreferredDeclaration = DeclLoc;
+    Located.Definition = DefLoc;
+
+    SymbolQualitySignals Quality;
+    Quality.merge(Sym);
+    SymbolRelevanceSignals Relevance;
+    Relevance.Name = Sym.Name;
+    Relevance.Query = SymbolRelevanceSignals::Generic;
+    if (auto NameMatch = Filter.match(Sym.Name))
+      Relevance.NameMatch = *NameMatch;
+    else
+      return;
+    Relevance.merge(Sym);
+    auto Score =
+        evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate());
+    dlog("locateSymbolNamedTextuallyAt: {0}{1} = {2}\n{3}{4}\n", Sym.Scope,
+         Sym.Name, Score, Quality, Relevance);
+
+    Top.push({Score, std::move(Located)});
+  });
+  std::vector<LocatedSymbol> Result;
+  for (auto &Res : std::move(Top).items())
+    Result.push_back(std::move(Res.second));
+  // Assume we don't have results from the current file, otherwise the
+  // findNearbyIdentifier() mechanism would have handled them.
+  // If we have more than 3 results, and none from the current file, don't
+  // return anything, as confidence is too low.
+  // FIXME: Alternatively, try a stricter query?
+  if (Result.size() > 3)
+    return {};
+  return Result;
+}
+
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
   const auto &SM = AST.getSourceManager();
@@ -457,7 +599,7 @@
       return ASTResults;
   }
 
-  return {};
+  return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath);
 }
 
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST) {
Index: clang-tools-extra/clangd/FindSymbols.h
===================================================================
--- clang-tools-extra/clangd/FindSymbols.h
+++ clang-tools-extra/clangd/FindSymbols.h
@@ -21,6 +21,10 @@
 class ParsedAST;
 class SymbolIndex;
 
+/// Helper function for deriving an LSP Location from a SymbolLocation.
+llvm::Expected<Location> symbolLocationToLocation(const SymbolLocation &Loc,
+                                                  llvm::StringRef HintPath);
+
 /// Helper function for deriving an LSP Location for a Symbol.
 llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
                                           llvm::StringRef HintPath);
Index: clang-tools-extra/clangd/FindSymbols.cpp
===================================================================
--- clang-tools-extra/clangd/FindSymbols.cpp
+++ clang-tools-extra/clangd/FindSymbols.cpp
@@ -18,6 +18,7 @@
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Index/IndexingAction.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
@@ -39,15 +40,13 @@
 
 } // namespace
 
-llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
-                                          llvm::StringRef HintPath) {
-  // Prefer the definition over e.g. a function declaration in a header
-  auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration;
-  auto Path = URI::resolve(CD.FileURI, HintPath);
+llvm::Expected<Location> symbolLocationToLocation(const SymbolLocation &Loc,
+                                                  llvm::StringRef HintPath) {
+  auto Path = URI::resolve(Loc.FileURI, HintPath);
   if (!Path) {
     return llvm::make_error<llvm::StringError>(
-        formatv("Could not resolve path for symbol '{0}': {1}",
-                Sym.Name, llvm::toString(Path.takeError())),
+        llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI,
+                      llvm::toString(Path.takeError())),
         llvm::inconvertibleErrorCode());
   }
   Location L;
@@ -55,14 +54,21 @@
   // request.
   L.uri = URIForFile::canonicalize(*Path, HintPath);
   Position Start, End;
-  Start.line = CD.Start.line();
-  Start.character = CD.Start.column();
-  End.line = CD.End.line();
-  End.character = CD.End.column();
+  Start.line = Loc.Start.line();
+  Start.character = Loc.Start.column();
+  End.line = Loc.End.line();
+  End.character = Loc.End.column();
   L.range = {Start, End};
   return L;
 }
 
+llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
+                                          llvm::StringRef HintPath) {
+  // Prefer the definition over e.g. a function declaration in a header
+  return symbolLocationToLocation(
+      Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, HintPath);
+}
+
 llvm::Expected<std::vector<SymbolInformation>>
 getWorkspaceSymbols(llvm::StringRef Query, int Limit,
                     const SymbolIndex *const Index, llvm::StringRef HintPath) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to