sammccall created this revision.
sammccall added a reviewer: kadircet.
Herald added subscribers: cfe-commits, arphaman, mgrang, jkorous, MaskRay, 
ilya-biryukov.
Herald added a project: clang.

- nodes can have special-cased hit ranges including "holes" (FunctionTypeLoc in 
void foo())
- token conflicts between siblings (int a,b;) are resolved in favor of left 
sibling
- parent/child overlap is handled statefully rather than explicitly by 
comparing parent/child ranges (this lets us share a mechanism with sibling 
conflicts)


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D63760

Files:
  clangd/Selection.cpp
  clangd/Selection.h
  clangd/unittests/SelectionTests.cpp

Index: clangd/unittests/SelectionTests.cpp
===================================================================
--- clangd/unittests/SelectionTests.cpp
+++ clangd/unittests/SelectionTests.cpp
@@ -144,9 +144,9 @@
           R"cpp(
             void foo();
             #define CALL_FUNCTION(X) X()
-            void bar() { CALL_FUNC^TION([[fo^o]]); }
+            void bar() [[{ CALL_FUNC^TION(fo^o); }]]
           )cpp",
-          "DeclRefExpr",
+          "CompoundStmt",
       },
       {
           R"cpp(
@@ -172,7 +172,20 @@
       {"void foo() { [[foo^()]]; }", "CallExpr"},
       {"void foo() { [[foo^]] (); }", "DeclRefExpr"},
       {"int bar; void foo() [[{ foo (); }]]^", "CompoundStmt"},
+
+      // Tricky case: FunctionTypeLoc in FunctionDecl has a hole in it.
       {"[[^void]] foo();", "TypeLoc"},
+      {"[[void foo^()]];", "TypeLoc"},
+      {"[[^void foo^()]];", "FunctionDecl"},
+      {"[[void ^foo()]];", "FunctionDecl"},
+      // Tricky case: two VarDecls share a specifier.
+      {"[[int ^a]], b;", "VarDecl"},
+      {"[[int a, ^b]];", "VarDecl"},
+      // Tricky case: anonymous struct is a sibling of the VarDecl.
+      {"[[st^ruct {int x;}]] y;", "CXXRecordDecl"},
+      {"[[struct {int x;} ^y]];", "VarDecl"},
+      {"struct {[[int ^x]];} y;", "FieldDecl"},
+
       {"^", nullptr},
       {"void foo() { [[foo^^]] (); }", "DeclRefExpr"},
 
Index: clangd/Selection.h
===================================================================
--- clangd/Selection.h
+++ clangd/Selection.h
@@ -89,6 +89,15 @@
     Node *Parent;
     // Direct children within the selection tree.
     llvm::SmallVector<const Node *, 8> Children;
+    // Code ranges covered by this node. Usually {ASTNode.getSourceRange()}.
+    // Some nodes have "split" ranges, e.g.
+    //      void SomeFunction(int);
+    //      ~~~~             ~~~~~  <== FunctionTypeLoc is split.
+    // Or the range is narrrower than ASTNode.getSourceRange()
+    //      return Foo(42);
+    //                ~~~~          <== CXXConstructExpr excludes 'Foo'.
+    // Ranges are sorted and valid.
+    llvm::SmallVector<SourceRange, 1> Range;
     // The corresponding node from the full AST.
     ast_type_traits::DynTypedNode ASTNode;
     // The extent to which this node is covered by the selection.
Index: clangd/Selection.cpp
===================================================================
--- clangd/Selection.cpp
+++ clangd/Selection.cpp
@@ -8,13 +8,57 @@
 
 #include "Selection.h"
 #include "ClangdUnit.h"
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/TypeLoc.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
 
 namespace clang {
 namespace clangd {
 namespace {
 using Node = SelectionTree::Node;
 using ast_type_traits::DynTypedNode;
+using OffsetRange = std::pair<unsigned, unsigned>;
+
+// Stores a collection of (possibly-overlapping) OffsetRanges.
+// When new ranges are added, hit-tests them against existing ones.
+class RangeSet {
+public:
+  // Returns true if any of NewRanges are new to the set.
+  bool add(SmallVector<OffsetRange, 1> NewRanges) {
+    assert(std::is_sorted(Ranges.begin(), Ranges.end()));
+    assert(std::is_sorted(NewRanges.begin(), NewRanges.end()));
+    if (NewRanges.empty())
+      return false;
+
+    // Check if each new range is covered by existing ranges.
+    // We can do this in one pass, as both lists are sorted.
+    auto OldIt = Ranges.begin(), OldEnd = Ranges.end();
+    auto IsCovered = [&](const OffsetRange &New) -> bool {
+      unsigned Pos = New.first;
+      // Loop until this range is covered or we run out of children.
+      for (; Pos < New.second && OldIt != OldEnd; ++OldIt) {
+        if (OldIt->first > Pos)
+          return false; // [Pos, ChildIt->first) is not covered.
+        if (Pos < OldIt->second)
+          Pos = OldIt->second; // Erase *ChildIt from MyRange.
+      }
+      return Pos >= New.second;
+    };
+    NewRanges.erase(llvm::remove_if(NewRanges, IsCovered), NewRanges.end());
+    if (NewRanges.empty())
+      return false;
+
+    Ranges.insert(Ranges.end(), NewRanges.begin(), NewRanges.end());
+    llvm::sort(Ranges); // should we merge, too?
+    return true;
+  }
+
+private:
+  std::vector<OffsetRange> Ranges; // Always sorted.
+};
 
 // We find the selection by visiting written nodes in the AST, looking for nodes
 // that intersect with the selected character range.
@@ -51,7 +95,7 @@
   //  - those that can't be stored in DynTypedNode.
   // We're missing some interesting things like Attr due to the latter.
   bool TraverseDecl(Decl *X) {
-    if (X && isa<TranslationUnitDecl>(X))
+    if (X && llvm::isa<TranslationUnitDecl>(X))
       return Base::TraverseDecl(X); // Already pushed by constructor.
     // Base::TraverseDecl will suppress children, but not this node itself.
     if (X && X->isImplicit())
@@ -71,9 +115,12 @@
   }
   // Stmt is the same, but this form allows the data recursion optimization.
   bool dataTraverseStmtPre(Stmt *X) {
-    if (!X || canSafelySkipNode(X->getSourceRange()))
+    if (X == nullptr)
       return false;
-    push(DynTypedNode::create(*X));
+    auto Bounds = computeRanges(X);
+    if (canSafelySkipNode(Bounds))
+      return false;
+    push(DynTypedNode::create(*X), std::move(Bounds));
     return true;
   }
   bool dataTraverseStmtPost(Stmt *X) {
@@ -86,6 +133,9 @@
 
 private:
   using Base = RecursiveASTVisitor<SelectionVisitor>;
+  using Ranges = SmallVector<SourceRange, 1>;
+  using OffsetRange = std::pair<unsigned, unsigned>;
+
   SelectionVisitor(ASTContext &AST, unsigned SelBegin, unsigned SelEnd,
                    FileID SelFile)
       : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
@@ -104,30 +154,96 @@
   // Node is always a pointer so the generic code can handle any null checks.
   template <typename T, typename Func>
   bool traverseNode(T *Node, const Func &Body) {
-    if (Node == nullptr || canSafelySkipNode(Node->getSourceRange()))
+    if (Node == nullptr)
+      return true;
+    auto Bounds = computeRanges(Node);
+    if (canSafelySkipNode(Bounds))
       return true;
-    push(DynTypedNode::create(*Node));
+    push(DynTypedNode::create(*Node), std::move(Bounds));
     bool Ret = Body();
     pop();
     return Ret;
   }
 
+  // HIT TESTING
+  //
+  // We do rough hit testing on the way down the tree to avoid traversing
+  // subtrees that don't touch the selection (canSafelySkipNode), but
+  // fine-grained hit-testing is done on the way back up (in pop()).
+  // This means children get to claim parts of the selection first, and parents
+  // are only selected if they own tokens that no child owned.
+  //
+  // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
+  // parent's, and a node (transitively) owns all tokens in its range.
+  //
+  // Exception 1: child range claims tokens that should be owned by the parent.
+  //              e.g. in `void foo(int);`, the FunctionTypeLoc should own
+  //              `void (int)` but the parent FunctionDecl should own `foo`.
+  // To handle this case, computeRanges() special cases these nodes, and can
+  // return a set of discontiguous ranges.
+  //
+  // Exception 2: siblings both claim the same node.
+  //              e.g. `int x, y;` produces two sibling VarDecls.
+  //                    ~~~~~ x
+  //                    ~~~~~~~~ y
+  // Here the first ("leftmost") sibling claims the tokens it wants, and the
+  // other sibling gets what's left. So selecting "int" only includes the left
+  // VarDecl in the selection tree.
+
+  template <typename T> Ranges computeRanges(T *Node) const {
+    SourceRange Result = Node->getSourceRange();
+    if (Result.isInvalid())
+      return {};
+    return {Result};
+  }
+
+  Ranges computeRanges(TypeLoc *Node) const {
+    // In `void foo()`, FunctionTypeLoc should claim `void ()`, not `foo`.
+    if (auto FTL = Node->getAs<FunctionTypeLoc>()) {
+      TypeLoc Return = FTL.getReturnLoc();
+      Ranges R = computeRanges(&Return);
+      R.push_back(FTL.getLocalSourceRange());
+      return R;
+    }
+    return computeRanges<TypeLoc>(Node);
+  }
+
+  Ranges computeRanges(Stmt *Node) const {
+    // CXXConstructExpression's getSourceRange is pretty bizarre. It sometimes
+    // wants to capture the type, and sometimes the variable name.
+    // And it's often a sibling to the TypeLoc or child of a VarDecl, so this
+    // isn't what we want.
+    // We consider it to include the arg list only (including parens).
+    if (auto *CCE = dyn_cast<CXXConstructExpr>(Node)) {
+      if (CCE->getParenOrBraceRange().isValid())
+        return {CCE->getParenOrBraceRange()};
+      else if (CCE->getNumArgs() == 1)
+        return computeRanges(CCE->getArg(0));
+      else
+        return {};
+    }
+    return computeRanges<Stmt>(Node);
+  }
+
   // An optimization for a common case: nodes outside macro expansions that
   // don't intersect the selection may be recursively skipped.
-  bool canSafelySkipNode(SourceRange S) {
-    auto B = SM.getDecomposedLoc(S.getBegin());
-    auto E = SM.getDecomposedLoc(S.getEnd());
+  bool canSafelySkipNode(const SmallVector<SourceRange, 1> &Range) {
+    if (Range.empty())
+      return true;
+    auto B = SM.getDecomposedLoc(Range.front().getBegin());
+    auto E = SM.getDecomposedLoc(Range.back().getEnd());
     if (B.first != SelFile || E.first != SelFile)
       return false;
     return B.second >= SelEnd || E.second < SelBeginTokenStart;
   }
 
   // Pushes a node onto the ancestor stack. Pairs with pop().
-  void push(DynTypedNode Node) {
+  void push(DynTypedNode Node, SmallVector<SourceRange, 1> Range) {
     Nodes.emplace_back();
     Nodes.back().ASTNode = std::move(Node);
     Nodes.back().Parent = Stack.top();
     Nodes.back().Selected = SelectionTree::Unselected;
+    Nodes.back().Range = std::move(Range);
     Stack.push(&Nodes.back());
   }
 
@@ -150,80 +266,62 @@
   // This runs for every node in the AST, and must be fast in common cases.
   // This is called from pop(), so we can take children into account.
   SelectionTree::Selection computeSelection(const Node &N) {
-    SourceRange S = N.ASTNode.getSourceRange();
-    if (!S.isValid())
+    if (N.Range.empty())
       return SelectionTree::Unselected;
+    SmallVector<OffsetRange, 1> Ranges;
     // getTopMacroCallerLoc() allows selection of constructs in macro args. e.g:
     //   #define LOOP_FOREVER(Body) for(;;) { Body }
     //   void IncrementLots(int &x) {
     //     LOOP_FOREVER( ++x; )
     //   }
     // Selecting "++x" or "x" will do the right thing.
-    auto B = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getBegin()));
-    auto E = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getEnd()));
-    // Otherwise, nodes in macro expansions can't be selected.
-    if (B.first != SelFile || E.first != SelFile)
-      return SelectionTree::Unselected;
+    for (const SourceRange &S : N.Range) {
+      auto B = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getBegin()));
+      auto E = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getEnd()));
+      // Otherwise, nodes in macro expansions can't be selected.
+      if (B.first != SelFile || E.first != SelFile)
+        continue;
+      Ranges.push_back({B.second, E.second});
+    }
     // Cheap test: is there any overlap at all between the selection and range?
     // Note that E.second is the *start* of the last token, which is why we
     // compare against the "rounded-down" SelBegin.
-    if (B.second >= SelEnd || E.second < SelBeginTokenStart)
+    if (Ranges.empty() || Ranges.front().first >= SelEnd ||
+        Ranges.back().second < SelBeginTokenStart)
       return SelectionTree::Unselected;
 
-    // We hit something, need some more precise checks.
+    // We may have hit something, need some more precise checks.
     // Adjust [B, E) to be a half-open character range.
-    E.second += Lexer::MeasureTokenLength(S.getEnd(), SM, LangOpts);
+    for (auto &Range : Ranges)
+      Range.second += Lexer::MeasureTokenLength(
+          SM.getComposedLoc(SelFile, Range.second), SM, LangOpts);
+    auto PreciseBounds =
+        std::make_pair(Ranges.front().first, Ranges.back().second);
+    // Trim each range using the selection, drop ranges that are empty.
+    Ranges.erase(llvm::remove_if(Ranges,
+                                 [&](OffsetRange &Range) {
+                                   Range.first =
+                                       std::max(Range.first, SelBegin);
+                                   Range.second =
+                                       std::min(Range.second, SelEnd);
+                                   return Range.second <= Range.first;
+                                 }),
+                 Ranges.end());
     // This node's own selected text is (this range ^ selection) - child ranges.
     // If that's empty, then we've only collided with children.
-    if (nodesCoverRange(N.Children, std::max(SelBegin, B.second),
-                        std::min(SelEnd, E.second)))
+    if (!Claimed.add(std::move(Ranges)))
       return SelectionTree::Unselected; // Hit children only.
     // Some of our own characters are covered, this is a true hit.
-    return (B.second >= SelBegin && E.second <= SelEnd)
+    // (Ranges is nonempty, otherwise nodesCoverRanges was true).
+    return (PreciseBounds.first >= SelBegin && PreciseBounds.second <= SelEnd)
                ? SelectionTree::Complete
                : SelectionTree::Partial;
   }
 
-  // Is the range [Begin, End) entirely covered by the union of the Nodes?
-  // (The range is a parent node's extent, and the covering nodes are children).
-  bool nodesCoverRange(llvm::ArrayRef<const Node *> Nodes, unsigned Begin,
-                       unsigned End) {
-    if (Begin >= End)
-      return true;
-    if (Nodes.empty())
-      return false;
-
-    // Collect all the expansion ranges, as offsets.
-    SmallVector<std::pair<unsigned, unsigned>, 8> ChildRanges;
-    for (const Node *N : Nodes) {
-      CharSourceRange R = SM.getExpansionRange(N->ASTNode.getSourceRange());
-      auto B = SM.getDecomposedLoc(R.getBegin());
-      auto E = SM.getDecomposedLoc(R.getEnd());
-      if (B.first != SelFile || E.first != SelFile)
-        continue;
-      // Try to cover up to the next token, spaces between children don't count.
-      if (auto Tok = Lexer::findNextToken(R.getEnd(), SM, LangOpts))
-        E.second = SM.getFileOffset(Tok->getLocation());
-      else if (R.isTokenRange())
-        E.second += Lexer::MeasureTokenLength(R.getEnd(), SM, LangOpts);
-      ChildRanges.push_back({B.second, E.second});
-    }
-    llvm::sort(ChildRanges);
-
-    // Scan through the child ranges, removing as we go.
-    for (const auto R : ChildRanges) {
-      if (R.first > Begin)
-        return false;   // [Begin, R.first) is not covered.
-      Begin = R.second; // Eliminate [R.first, R.second).
-      if (Begin >= End)
-        return true; // Remaining range is empty.
-    }
-    return false; // Went through all children, trailing characters remain.
-  }
-
   SourceManager &SM;
   const LangOptions &LangOpts;
   std::stack<Node *> Stack;
+  RangeSet Claimed;
   std::deque<Node> Nodes; // Stable pointers as we add more nodes.
   // Half-open selection range.
   unsigned SelBegin;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to