[PATCH] D75479: [clangd] go-to-def on names in comments etc that are used nearby.

Sam McCall via Phabricator via cfe-commits Thu, 16 Apr 2020 01:38:04 -0700

sammccall updated this revision to Diff 257989.
sammccall added a comment.

rebase - finally getting back to this



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D75479/new/

https://reviews.llvm.org/D75479

Files:
  clang-tools-extra/clangd/XRefs.cpp
  clang-tools-extra/clangd/XRefs.h
  clang-tools-extra/clangd/unittests/XRefsTests.cpp

Index: clang-tools-extra/clangd/unittests/XRefsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -985,6 +985,88 @@
       ElementsAre(Sym("foo", FooWithoutHeader.range())));
 }
 
+TEST(LocateSymbol, NearbyTokenSmoke) {
+  auto T = Annotations(R"cpp(
+    // prints e^rr and crashes
+    void die(const char* [[err]]);
+  )cpp");
+  auto AST = TestTU::withCode(T.code()).build();
+  // We don't pass an index, so can't hit index-based fallback.
+  EXPECT_THAT(locateSymbolAt(AST, T.point()),
+              ElementsAre(Sym("err", T.range())));
+}
+
+TEST(LocateSymbol, NearbyIdentifier) {
+  const char *Tests[] = {
+      R"cpp(
+      // regular identifiers (won't trigger)
+      int hello;
+      int y = he^llo;
+    )cpp",
+      R"cpp(
+      // disabled preprocessor sections
+      int [[hello]];
+      #if 0
+      int y = ^hello;
+      #endif
+    )cpp",
+      R"cpp(
+      // comments
+      // he^llo, world
+      int [[hello]];
+    )cpp",
+      R"cpp(
+      // string literals
+      int [[hello]];
+      const char* greeting = "h^ello, world";
+    )cpp",
+
+      R"cpp(
+      // can refer to macro invocations (even if they expand to nothing)
+      #define INT int
+      [[INT]] x;
+      // I^NT
+    )cpp",
+
+      R"cpp(
+      // prefer nearest occurrence
+      int hello;
+      int x = hello;
+      // h^ello
+      int y = [[hello]];
+      int z = hello;
+    )cpp",
+
+      R"cpp(
+      // short identifiers find near results
+      int [[hi]];
+      // h^i
+    )cpp",
+      R"cpp(
+      // short identifiers don't find far results
+      int hi;
+
+
+
+      // h^i
+    )cpp",
+  };
+  for (const char *Test : Tests) {
+    Annotations T(Test);
+    auto AST = TestTU::withCode(T.code()).build();
+    const auto &SM = AST.getSourceManager();
+    llvm::Optional<Range> Nearby;
+    if (const auto *Tok = findNearbyIdentifier(
+            cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens()))
+      Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange(
+                                       Tok->location(), Tok->endLocation()));
+    if (T.ranges().empty())
+      EXPECT_THAT(Nearby, Eq(llvm::None)) << Test;
+    else
+      EXPECT_THAT(Nearby, T.range()) << Test;
+  }
+}
+
 TEST(FindReferences, WithinAST) {
   const char *Tests[] = {
       R"cpp(// Local variable
Index: clang-tools-extra/clangd/XRefs.h
===================================================================
--- clang-tools-extra/clangd/XRefs.h
+++ clang-tools-extra/clangd/XRefs.h
@@ -26,6 +26,10 @@
 #include <vector>
 
 namespace clang {
+namespace syntax {
+class Token;
+class TokenBuffer;
+} // namespace syntax
 namespace clangd {
 class ParsedAST;
 
@@ -64,6 +68,13 @@
                              SourceLocation Loc,
                              const std::string &MainFilePath);
 
+// If SpellingLoc points at a "word" that does not correspond to an expanded
+// token (e.g. in a comment, a string, or a PP disabled region), then try to
+// find a close occurrence of that word that does.
+// (This is for internal use by locateSymbolAt, and is exposed for testing).
+const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
+                                          const syntax::TokenBuffer &TB);
+
 /// Get all document links
 std::vector<DocumentLink> getDocumentLinks(ParsedAST &AST);
 
Index: clang-tools-extra/clangd/XRefs.cpp
===================================================================
--- clang-tools-extra/clangd/XRefs.cpp
+++ clang-tools-extra/clangd/XRefs.cpp
@@ -48,6 +48,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -315,6 +316,12 @@
   return Result;
 }
 
+bool tokenSpelledAt(SourceLocation SpellingLoc, const syntax::TokenBuffer &TB) {
+  auto ExpandedTokens = TB.expandedTokens(
+      TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc));
+  return !ExpandedTokens.empty();
+}
+
 llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) {
   unsigned B = Offset, E = Offset;
   while (B > 0 && isIdentifierBody(Code[B - 1]))
@@ -481,6 +488,84 @@
   return Results;
 }
 
+const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc,
+                                          const syntax::TokenBuffer &TB) {
+  const SourceManager &SM = TB.sourceManager();
+  auto Pos = SM.getDecomposedLoc(SpellingLoc);
+  llvm::StringRef Code = SM.getBufferData(Pos.first);
+  llvm::StringRef Word = wordTouching(Code, Pos.second);
+  if (Word.empty())
+    return nullptr;
+  unsigned WordOffset = Word.data() - Code.data();
+  SourceLocation WordStart = SM.getComposedLoc(Pos.first, WordOffset);
+  // If this is a real token that survived preprocessing, don't use heuristics.
+  auto WordExpandedTokens =
+      TB.expandedTokens(SM.getMacroArgExpandedLocation(WordStart));
+  if (!WordExpandedTokens.empty())
+    return nullptr;
+
+  // We prefer the closest possible token, line-wise. Backwards is penalized.
+  // Ties are implicitly broken by traversal order (first-one-wins).
+  unsigned WordLine = SM.getLineNumber(Pos.first, WordOffset);
+  auto Cost = [&](SourceLocation Loc) -> unsigned {
+    assert(SM.getFileID(Loc) == Pos.first && "spelled token in wrong file?");
+    unsigned Line = SM.getLineNumber(Pos.first, SM.getFileOffset(Loc));
+    if (Line > WordLine)
+      return 1 + llvm::Log2_64(Line - WordLine);
+    if (Line < WordLine)
+      return 2 + llvm::Log2_64(WordLine - Line);
+    return 0;
+  };
+  const syntax::Token *BestTok = nullptr;
+  // Search bounds are based on word length: 2^N lines forward.
+  unsigned BestCost = Word.size() + 1;
+
+  // Updates BestTok and BestCost if Tok is a good candidate.
+  // May return true if the cost is too high for this token.
+  auto Consider = [&](const syntax::Token &Tok) {
+    if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word))
+      return false;
+    // No point guessing the same location we started with.
+    if (Tok.location() == WordStart)
+      return false;
+    // We've done cheap checks, compute cost so we can break the caller's loop.
+    unsigned TokCost = Cost(Tok.location());
+    if (TokCost >= BestCost)
+      return true; // causes the outer loop to break.
+    // Allow locations that might be part of the AST, and macros (even if empty)
+    // but not things like disabled preprocessor sections.
+    if (!(tokenSpelledAt(Tok.location(), TB) || TB.expansionStartingAt(&Tok)))
+      return false;
+    // We already verified this token is an improvement.
+    BestCost = TokCost;
+    BestTok = &Tok;
+    return false;
+  };
+  auto SpelledTokens = TB.spelledTokens(Pos.first);
+  // Find where the word occurred in the token stream, to search forward & back.
+  auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) {
+    assert(SM.getFileID(T.location()) == SM.getFileID(WordStart));
+    return T.location() >= WordStart; // Comparison OK: same file.
+  });
+  // Search for matches after the cursor.
+  for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end()))
+    if (Consider(Tok))
+      break; // costs of later tokens are greater...
+  // Search for matches before the cursor.
+  for (const syntax::Token &Tok :
+       llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I)))
+    if (Consider(Tok))
+      break;
+
+  if (BestTok)
+    vlog(
+        "Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}",
+        Word, WordStart.printToString(SM),
+        BestTok->location().printToString(SM));
+
+  return BestTok;
+}
+
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
   const auto &SM = AST.getSourceManager();
@@ -516,6 +601,18 @@
   if (!ASTResults.empty())
     return ASTResults;
 
+  // If the cursor doesn't refer directly to anything, try to find another
+  // occurrence of the same word that does.
+  if (const syntax::Token *NearbyIdent =
+          findNearbyIdentifier(*CurLoc, AST.getTokens())) {
+    if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath))
+      return {*std::move(Macro)};
+    ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST,
+                                   *MainFilePath, Index);
+    if (!ASTResults.empty())
+      return ASTResults;
+  }
+
   return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75479: [clangd] go-to-def on names in comments etc that are used nearby.

Reply via email to