sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added subscribers: cfe-commits, usaxena95, kadircet, arphaman, jkorous, 
MaskRay.
Herald added a project: clang.

The exclusive-claim model is successful at resolving conflicts over tokens
between parent/child or siblings. However it's not the right model for selecting
AST nodes produced by a macro expansion, which can produce several independent
nodes that are equally associated with the macro invocation.
Additionally, any model that only uses the endpoints in a range can fail when
a macro invocation occurs inside the node.

To address this, we use the existing TokenBuffer in more depth. SourceRanges
are translated into an array of expanded tokens we can iterate over, and in
principle process token by token (in practice, batching related tokens helps).

For regular tokens (and macro-arg expansions) we claim the tokens as before,
but for tokens from macro bodies we merely check whether the macro name was
selected. Thus tokens in macro bodies are selected by selecting the macro name.

The aggregation of Selection is now more principled as we need to be able to
call claim()/peek() an arbitrary number of times.

One side-effect of iterating over the expanded tokens is that (usually) nothing
claims preprocessor tokens like macro names and directives.
Rather than fixing this I just left them unclaimed, and removed support for
determining the selectedness of TUDecl.
(That was originally implemented in 90a5bf92ff97b1, but doesn't seem to be very
important or worth the complexity any longer).

The expandedTokens(SourceLocation) helper could be added locally, but seems to
make sense on TokenBuffer.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70512

Files:
  clang-tools-extra/clangd/Selection.cpp
  clang-tools-extra/clangd/Selection.h
  clang-tools-extra/clangd/unittests/SelectionTests.cpp
  clang-tools-extra/clangd/unittests/TweakTests.cpp
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp

Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/Annotations.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
+#include "gmock/gmock.h"
 #include <cassert>
 #include <cstdlib>
 #include <gmock/gmock.h>
@@ -663,6 +664,20 @@
               ValueIs(SameRange(findSpelled("not_mapped"))));
 }
 
+TEST_F(TokenBufferTest, ExpandedTokensForRange) {
+  recordTokens(R"cpp(
+    #define SIGN(X) X##_washere
+    A SIGN(B) C SIGN(D) E SIGN(F) G
+  )cpp");
+
+  SourceRange R(findExpanded("C").front().location(),
+                findExpanded("F_washere").front().location());
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(Buffer.expandedTokens(R),
+              SameRange(findExpanded("C D_washere E F_washere")));
+  EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
+}
+
 TEST_F(TokenBufferTest, ExpansionStartingAt) {
   // Object-like macro expansions.
   recordTokens(R"cpp(
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -119,6 +119,22 @@
   return Text.substr(Begin, length());
 }
 
+llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
+  if (R.isInvalid())
+    return {};
+  const Token *Begin =
+      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
+        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
+      });
+  const Token *End =
+      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
+        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
+      });
+  if (Begin > End)
+    return {};
+  return {Begin, End};
+}
+
 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
 TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   assert(Expanded);
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -182,6 +182,10 @@
     return ExpandedTokens;
   }
 
+  /// Returns the subrange of expandedTokens() corresponding to the closed
+  /// token range R.
+  llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;
+
   /// Find the subrange of spelled tokens that produced the corresponding \p
   /// Expanded tokens.
   ///
Index: clang-tools-extra/clangd/unittests/TweakTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/TweakTests.cpp
+++ clang-tools-extra/clangd/unittests/TweakTests.cpp
@@ -269,7 +269,7 @@
   EXPECT_UNAVAILABLE(UnavailableCases);
 
   // vector of pairs of input and output strings
-  const std::vector<std::pair<llvm::StringLiteral, llvm::StringLiteral>>
+  const std::vector<std::pair<std::string, std::string>>
       InputOutputs = {
           // extraction from variable declaration/assignment
           {R"cpp(void varDecl() {
@@ -321,17 +321,10 @@
                    if(1)
                     LOOP(5 + [[3]])
                  })cpp",
-           /*FIXME: It should be extracted like this. SelectionTree needs to be
-             * fixed for macros.
             R"cpp(#define LOOP(x) while (1) {a = x;}
-                void f(int a) {
-                  auto dummy = 3; if(1)
-                   LOOP(5 + dummy)
-                })cpp"},*/
-           R"cpp(#define LOOP(x) while (1) {a = x;}
                  void f(int a) {
-                   auto dummy = LOOP(5 + 3); if(1)
-                    dummy
+                   auto dummy = 3; if(1)
+                    LOOP(5 + dummy)
                  })cpp"},
           {R"cpp(#define LOOP(x) do {x;} while(1);
                  void f(int a) {
@@ -644,13 +637,18 @@
   )cpp";
   EXPECT_EQ(apply(TemplateFailInput), "unavailable");
 
-  // FIXME: This should be extractable after selectionTree works correctly for
-  // macros (currently it doesn't select anything for the following case)
-  std::string MacroFailInput = R"cpp(
+  std::string MacroInput = R"cpp(
     #define F(BODY) void f() { BODY }
     F ([[int x = 0;]])
   )cpp";
-  EXPECT_EQ(apply(MacroFailInput), "unavailable");
+  std::string MacroOutput = R"cpp(
+    #define F(BODY) void f() { BODY }
+    void extracted() {
+int x = 0;
+}
+F (extracted();)
+  )cpp";
+  EXPECT_EQ(apply(MacroInput), MacroOutput);
 
   // Shouldn't crash.
   EXPECT_EQ(apply("void f([[int a]]);"), "unavailable");
Index: clang-tools-extra/clangd/unittests/SelectionTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -134,6 +134,15 @@
           )cpp",
           "IfStmt",
       },
+      {
+          R"cpp(
+            int x(int);
+            #define M(foo) x(foo)
+            int a = 42;
+            int b = M([[^a]]);
+          )cpp",
+          "DeclRefExpr",
+      },
       {
           R"cpp(
             void foo();
@@ -386,10 +395,10 @@
           void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {}
       )cpp",
       R"cpp(int a = [[5 >^> 1]];)cpp",
-      R"cpp([[
+      R"cpp(
         #define ECHO(X) X
-        ECHO(EC^HO([[$C[[int]]) EC^HO(a]]));
-      ]])cpp",
+        ECHO(EC^HO($C[[int]]) EC^HO(a));
+      )cpp",
       R"cpp( $C[[^$C[[int]] a^]]; )cpp",
       R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp",
   };
Index: clang-tools-extra/clangd/Selection.h
===================================================================
--- clang-tools-extra/clangd/Selection.h
+++ clang-tools-extra/clangd/Selection.h
@@ -76,7 +76,7 @@
                 unsigned Start, unsigned End);
 
   // Describes to what extent an AST node is covered by the selection.
-  enum Selection {
+  enum Selection : unsigned char {
     // The AST node owns no characters covered by the selection.
     // Note that characters owned by children don't count:
     //   if (x == 0) scream();
Index: clang-tools-extra/clangd/Selection.cpp
===================================================================
--- clang-tools-extra/clangd/Selection.cpp
+++ clang-tools-extra/clangd/Selection.cpp
@@ -34,6 +34,22 @@
 using Node = SelectionTree::Node;
 using ast_type_traits::DynTypedNode;
 
+// Sentinel value for the selectedness of a node where we've seen no tokens yet.
+// Once we see a token, treated as fully-selected (so far). If we never see a
+// token, it folds into Unselected. These are never exposed publicly,
+constexpr SelectionTree::Selection NoTokens =
+    static_cast<SelectionTree::Selection>(
+        static_cast<unsigned char>(SelectionTree::Complete + 1));
+
+// Nodes start start with NoTokens, and then use this function to aggregate
+// the selectedness as more tokens are found.
+void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
+  if (Result == NoTokens)
+    Result = New;
+  else if (Result != New)
+    Result = SelectionTree::Partial;
+}
+
 // Identifies which tokens are selected, and evaluates claims of source ranges
 // by AST nodes. Tokens may be claimed only once: first-come, first-served.
 class SelectedTokens {
@@ -102,13 +118,20 @@
       }
     }
 
-    // If some tokens were previously claimed (Result != Unselected), we may
-    // upgrade from Partial->Complete, even if no new tokens were claimed.
-    // Important for [[int a]].
-    if (ClaimedAnyToken || Result) {
-      Result = std::max(Result, PartialSelection ? SelectionTree::Partial
-                                                 : SelectionTree::Complete);
-    }
+    if (ClaimedAnyToken)
+      update(Result, PartialSelection ? SelectionTree::Partial
+                                      : SelectionTree::Complete);
+  }
+
+  // Checks whether the token at Offset is selected, and updates Result.
+  // Does not claim the token.
+  void peek(unsigned Offset, SelectionTree::Selection &Result) const {
+    // Find the token, if it exists.
+    auto I = llvm::partition_point(
+        Tokens, [&](const TokInfo &Tok) { return Tok.EndOffset <= Offset; });
+    if (I == Tokens.end() || I->StartOffset != Offset)
+      return;
+    update(Result, I->Selected);
   }
 
 private:
@@ -195,16 +218,6 @@
     V.TraverseAST(AST);
     assert(V.Stack.size() == 1 && "Unpaired push/pop?");
     assert(V.Stack.top() == &V.Nodes.front());
-    // We selected TUDecl if tokens were unclaimed (or the file is empty).
-    SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected;
-    V.Claimed.claim(Begin, End, UnclaimedTokens);
-    if (UnclaimedTokens || V.Nodes.size() == 1) {
-      StringRef FileContent = AST.getSourceManager().getBufferData(File);
-      // Don't require the trailing newlines to be selected.
-      bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size();
-      V.Stack.top()->Selected =
-          SelectedAll ? SelectionTree::Complete : SelectionTree::Partial;
-    }
     return std::move(V.Nodes);
   }
 
@@ -289,6 +302,7 @@
 #ifndef NDEBUG
         PrintPolicy(PP),
 #endif
+        Tokens(Tokens),
         Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd),
         SelFile(SelFile),
         SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken(
@@ -377,11 +391,9 @@
     Nodes.emplace_back();
     Nodes.back().ASTNode = std::move(Node);
     Nodes.back().Parent = Stack.top();
+    Nodes.back().Selected = NoTokens;
     Stack.push(&Nodes.back());
     claimRange(Early, Nodes.back().Selected);
-    // Early hit detection never selects the whole node.
-    if (Nodes.back().Selected)
-      Nodes.back().Selected = SelectionTree::Partial;
   }
 
   // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
@@ -390,6 +402,8 @@
     Node &N = *Stack.top();
     dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
     claimRange(N.ASTNode.getSourceRange(), N.Selected);
+    if (N.Selected == NoTokens)
+      N.Selected = SelectionTree::Unselected;
     if (N.Selected || !N.Children.empty()) {
       // Attach to the tree.
       N.Parent->Children.push_back(&N);
@@ -426,29 +440,67 @@
   void claimRange(SourceRange S, SelectionTree::Selection &Result) {
     if (!S.isValid())
       return;
-    // toHalfOpenFileRange() allows selection of constructs in macro args. e.g:
-    //   #define LOOP_FOREVER(Body) for(;;) { Body }
-    //   void IncrementLots(int &x) {
-    //     LOOP_FOREVER( ++x; )
-    //   }
-    // Selecting "++x" or "x" will do the right thing.
-    auto Range = toHalfOpenFileRange(SM, LangOpts, S);
-    assert(Range && "We should be able to get the File Range");
-    dlog("{1}claimRange: {0}", Range->printToString(SM), indent());
-    auto B = SM.getDecomposedLoc(Range->getBegin());
-    auto E = SM.getDecomposedLoc(Range->getEnd());
-    // Otherwise, nodes in macro expansions can't be selected.
-    if (B.first != SelFile || E.first != SelFile)
-      return;
-    // Attempt to claim the remaining range. If there's nothing to claim, only
-    // children were selected.
-    Claimed.claim(B.second, E.second, Result);
+
+    // We need to iterate over all the expanded tokens that are part of S.
+    // Consider the macro expansion FLAG(x) -> int x = 0;
+    // Neither S.getBegin() nor S.getEnd() are arg expansions, but x is.
+    // The selection FLAG([[x]]) must partially select the VarDecl.
+    llvm::ArrayRef<syntax::Token> Remaining = Tokens.expandedTokens(S);
+    while (!Remaining.empty()) {
+      // Take consecutive tokens from the same context together for efficiency.
+      FileID FID = SM.getFileID(Remaining.front().location());
+      auto Batch = Remaining.take_while([&](const syntax::Token &T) {
+        return SM.getFileID(T.location()) == FID;
+      });
+      assert(!Batch.empty());
+      Remaining = Remaining.drop_front(Batch.size());
+
+      // There are several possible categories of FileID depending on how the
+      // preprocessor was used to generate these tokens:
+      //   main file, #included file, macro args, macro bodies.
+      // We need to identify the main-file tokens that represent Batch, and
+      // determine whether we want to exclusively claim them. Regular tokens
+      // represent one AST construct, but a macro invocation can represent many.
+      if (FID == SelFile) {
+        // Tokens written directly in the main file.
+        // Claim the token exclusively for this node.
+        Claimed.claim(SM.getFileOffset(Batch.front().location()),
+                      SM.getFileOffset(Batch.back().location()) +
+                          Batch.back().length(),
+                      Result);
+      } else if (Batch.front().location().isFileID()) {
+        // Tokens in another file #included into the main file.
+        // Check if the #include is selected, but don't claim it exclusively.
+        for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
+             Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
+          if (SM.getFileID(Loc) == SelFile) {
+            Claimed.peek(SM.getFileOffset(Loc), Result);
+            break;
+          }
+        }
+      } else {
+        SourceLocation ArgStart =
+            SM.getTopMacroCallerLoc(Batch.front().location());
+        if (SM.getFileID(ArgStart) == SelFile) {
+          // Tokens that were passed as a macro argument.
+          // Claim the token exclusively for this node.
+          // FIXME: this prevents selecting both occurrences of args used twice.
+          SourceLocation ArgEnd =
+              SM.getTopMacroCallerLoc(Batch.back().location());
+          Claimed.claim(SM.getFileOffset(ArgStart),
+                        SM.getFileOffset(ArgEnd) + Batch.back().length(),
+                        Result);
+        } else {
+          // A non-argument macro expansion.
+          // Check if the macro name is selected, don't claim it exclusively.
+          auto Expansion = SM.getDecomposedExpansionLoc(S.getBegin());
+          if (Expansion.first == SelFile)
+            Claimed.peek(Expansion.second, Result);
+        }
+      }
+    }
     if (Result)
-      dlog("{1}hit selection: {0}",
-           SourceRange(SM.getComposedLoc(B.first, B.second),
-                       SM.getComposedLoc(E.first, E.second))
-               .printToString(SM),
-           indent());
+      dlog("{1}hit selection: {0}", S.printToString(SM), indent());
   }
 
   std::string indent(int Offset = 0) {
@@ -464,6 +516,7 @@
   const PrintingPolicy &PrintPolicy;
 #endif
   std::stack<Node *> Stack;
+  const syntax::TokenBuffer &Tokens;
   SelectedTokens Claimed;
   std::deque<Node> Nodes; // Stable pointers as we add more nodes.
   FileID SelFile;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to