sammccall updated this revision to Diff 230948.
sammccall marked 10 inline comments as done.
sammccall added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70512/new/

https://reviews.llvm.org/D70512

Files:
  clang-tools-extra/clangd/Selection.cpp
  clang-tools-extra/clangd/Selection.h
  clang-tools-extra/clangd/unittests/SelectionTests.cpp
  clang-tools-extra/clangd/unittests/TweakTests.cpp
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp

Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/Annotations.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
+#include "gmock/gmock.h"
 #include <cassert>
 #include <cstdlib>
 #include <gmock/gmock.h>
@@ -663,6 +664,20 @@
               ValueIs(SameRange(findSpelled("not_mapped"))));
 }
 
+TEST_F(TokenBufferTest, ExpandedTokensForRange) {
+  recordTokens(R"cpp(
+    #define SIGN(X) X##_washere
+    A SIGN(B) C SIGN(D) E SIGN(F) G
+  )cpp");
+
+  SourceRange R(findExpanded("C").front().location(),
+                findExpanded("F_washere").front().location());
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(Buffer.expandedTokens(R),
+              SameRange(findExpanded("C D_washere E F_washere")));
+  EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
+}
+
 TEST_F(TokenBufferTest, ExpansionStartingAt) {
   // Object-like macro expansions.
   recordTokens(R"cpp(
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -119,6 +119,22 @@
   return Text.substr(Begin, length());
 }
 
+llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
+  if (R.isInvalid())
+    return {};
+  const Token *Begin =
+      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
+        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
+      });
+  const Token *End =
+      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
+        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
+      });
+  if (Begin > End)
+    return {};
+  return {Begin, End};
+}
+
 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
 TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   assert(Expanded);
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -175,6 +175,7 @@
   /// All tokens produced by the preprocessor after all macro replacements,
   /// directives, etc. Source locations found in the clang AST will always
   /// point to one of these tokens.
+  /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()).
   /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split
   ///        into two '>' tokens by the parser. However, TokenBuffer currently
   ///        keeps it as a single '>>' token.
@@ -182,6 +183,10 @@
     return ExpandedTokens;
   }
 
+  /// Returns the subrange of expandedTokens() corresponding to the closed
+  /// token range R.
+  llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;
+
   /// Find the subrange of spelled tokens that produced the corresponding \p
   /// Expanded tokens.
   ///
Index: clang-tools-extra/clangd/unittests/TweakTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/TweakTests.cpp
+++ clang-tools-extra/clangd/unittests/TweakTests.cpp
@@ -269,7 +269,7 @@
   EXPECT_UNAVAILABLE(UnavailableCases);
 
   // vector of pairs of input and output strings
-  const std::vector<std::pair<llvm::StringLiteral, llvm::StringLiteral>>
+  const std::vector<std::pair<std::string, std::string>>
       InputOutputs = {
           // extraction from variable declaration/assignment
           {R"cpp(void varDecl() {
@@ -321,17 +321,10 @@
                    if(1)
                     LOOP(5 + [[3]])
                  })cpp",
-           /*FIXME: It should be extracted like this. SelectionTree needs to be
-             * fixed for macros.
             R"cpp(#define LOOP(x) while (1) {a = x;}
-                void f(int a) {
-                  auto dummy = 3; if(1)
-                   LOOP(5 + dummy)
-                })cpp"},*/
-           R"cpp(#define LOOP(x) while (1) {a = x;}
                  void f(int a) {
-                   auto dummy = LOOP(5 + 3); if(1)
-                    dummy
+                   auto dummy = 3; if(1)
+                    LOOP(5 + dummy)
                  })cpp"},
           {R"cpp(#define LOOP(x) do {x;} while(1);
                  void f(int a) {
@@ -644,13 +637,18 @@
   )cpp";
   EXPECT_EQ(apply(TemplateFailInput), "unavailable");
 
-  // FIXME: This should be extractable after selectionTree works correctly for
-  // macros (currently it doesn't select anything for the following case)
-  std::string MacroFailInput = R"cpp(
+  std::string MacroInput = R"cpp(
     #define F(BODY) void f() { BODY }
     F ([[int x = 0;]])
   )cpp";
-  EXPECT_EQ(apply(MacroFailInput), "unavailable");
+  std::string MacroOutput = R"cpp(
+    #define F(BODY) void f() { BODY }
+    void extracted() {
+int x = 0;
+}
+F (extracted();)
+  )cpp";
+  EXPECT_EQ(apply(MacroInput), MacroOutput);
 
   // Shouldn't crash.
   EXPECT_EQ(apply("void f([[int a]]);"), "unavailable");
Index: clang-tools-extra/clangd/unittests/SelectionTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -134,6 +134,15 @@
           )cpp",
           "IfStmt",
       },
+      {
+          R"cpp(
+            int x(int);
+            #define M(foo) x(foo)
+            int a = 42;
+            int b = M([[^a]]);
+          )cpp",
+          "DeclRefExpr",
+      },
       {
           R"cpp(
             void foo();
@@ -386,10 +395,10 @@
           void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {}
       )cpp",
       R"cpp(int a = [[5 >^> 1]];)cpp",
-      R"cpp([[
+      R"cpp(
         #define ECHO(X) X
-        ECHO(EC^HO([[$C[[int]]) EC^HO(a]]));
-      ]])cpp",
+        ECHO(EC^HO($C[[int]]) EC^HO(a));
+      )cpp",
       R"cpp( $C[[^$C[[int]] a^]]; )cpp",
       R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp",
   };
@@ -426,6 +435,20 @@
 
   EXPECT_EQ("BreakStmt", T.commonAncestor()->kind());
   EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind());
+
+  Case = R"cpp(
+#define MACRO while(1)
+    void test() {
+[[#include "Expand.inc"]]
+        br^eak;
+    }
+  )cpp";
+  Test = Annotations(Case);
+  TU = TestTU::withCode(Test.code());
+  AST = TU.build();
+  T = makeSelectionTree(Case, AST);
+
+  EXPECT_EQ("WhileStmt", T.commonAncestor()->kind());
 }
 
 TEST(SelectionTest, Implicit) {
Index: clang-tools-extra/clangd/Selection.h
===================================================================
--- clang-tools-extra/clangd/Selection.h
+++ clang-tools-extra/clangd/Selection.h
@@ -76,7 +76,7 @@
                 unsigned Start, unsigned End);
 
   // Describes to what extent an AST node is covered by the selection.
-  enum Selection {
+  enum Selection : unsigned char {
     // The AST node owns no characters covered by the selection.
     // Note that characters owned by children don't count:
     //   if (x == 0) scream();
Index: clang-tools-extra/clangd/Selection.cpp
===================================================================
--- clang-tools-extra/clangd/Selection.cpp
+++ clang-tools-extra/clangd/Selection.cpp
@@ -34,6 +34,24 @@
 using Node = SelectionTree::Node;
 using ast_type_traits::DynTypedNode;
 
+// Sentinel value for the selectedness of a node where we've seen no tokens yet.
+// Once we see a token, treated as fully-selected (so far). If we never see a
+// token, it folds into Unselected. These are never exposed publicly,
+constexpr SelectionTree::Selection NoTokens =
+    static_cast<SelectionTree::Selection>(
+        static_cast<unsigned char>(SelectionTree::Complete + 1));
+
+// Nodes start start with NoTokens, and then use this function to aggregate
+// the selectedness as more tokens are found.
+void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
+  assert(New != NoTokens);
+  if (Result == NoTokens)
+    Result = New;
+  else if (Result != New)
+    // Can only be completely selected (or unselected) if all tokens are.
+    Result = SelectionTree::Partial;
+}
+
 // Identifies which tokens are selected, and evaluates claims of source ranges
 // by AST nodes. Tokens may be claimed only once: first-come, first-served.
 class SelectedTokens {
@@ -102,13 +120,20 @@
       }
     }
 
-    // If some tokens were previously claimed (Result != Unselected), we may
-    // upgrade from Partial->Complete, even if no new tokens were claimed.
-    // Important for [[int a]].
-    if (ClaimedAnyToken || Result) {
-      Result = std::max(Result, PartialSelection ? SelectionTree::Partial
-                                                 : SelectionTree::Complete);
-    }
+    if (ClaimedAnyToken)
+      update(Result, PartialSelection ? SelectionTree::Partial
+                                      : SelectionTree::Complete);
+  }
+
+  // Checks whether the token at Offset is selected, and updates Result.
+  // Does not claim the token.
+  void peek(unsigned Offset, SelectionTree::Selection &Result) const {
+    // Find the token, if it exists.
+    auto I = llvm::partition_point(
+        Tokens, [&](const TokInfo &Tok) { return Tok.EndOffset <= Offset; });
+    if (I == Tokens.end() || I->StartOffset != Offset)
+      return;
+    update(Result, I->Selected);
   }
 
 private:
@@ -195,16 +220,6 @@
     V.TraverseAST(AST);
     assert(V.Stack.size() == 1 && "Unpaired push/pop?");
     assert(V.Stack.top() == &V.Nodes.front());
-    // We selected TUDecl if tokens were unclaimed (or the file is empty).
-    SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected;
-    V.Claimed.claim(Begin, End, UnclaimedTokens);
-    if (UnclaimedTokens || V.Nodes.size() == 1) {
-      StringRef FileContent = AST.getSourceManager().getBufferData(File);
-      // Don't require the trailing newlines to be selected.
-      bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size();
-      V.Stack.top()->Selected =
-          SelectedAll ? SelectionTree::Complete : SelectionTree::Partial;
-    }
     return std::move(V.Nodes);
   }
 
@@ -289,6 +304,7 @@
 #ifndef NDEBUG
         PrintPolicy(PP),
 #endif
+        Tokens(Tokens),
         Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd),
         SelFile(SelFile),
         SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken(
@@ -377,11 +393,9 @@
     Nodes.emplace_back();
     Nodes.back().ASTNode = std::move(Node);
     Nodes.back().Parent = Stack.top();
+    Nodes.back().Selected = NoTokens;
     Stack.push(&Nodes.back());
     claimRange(Early, Nodes.back().Selected);
-    // Early hit detection never selects the whole node.
-    if (Nodes.back().Selected)
-      Nodes.back().Selected = SelectionTree::Partial;
   }
 
   // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
@@ -390,6 +404,8 @@
     Node &N = *Stack.top();
     dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
     claimRange(N.ASTNode.getSourceRange(), N.Selected);
+    if (N.Selected == NoTokens)
+      N.Selected = SelectionTree::Unselected;
     if (N.Selected || !N.Children.empty()) {
       // Attach to the tree.
       N.Parent->Children.push_back(&N);
@@ -426,29 +442,75 @@
   void claimRange(SourceRange S, SelectionTree::Selection &Result) {
     if (!S.isValid())
       return;
-    // toHalfOpenFileRange() allows selection of constructs in macro args. e.g:
-    //   #define LOOP_FOREVER(Body) for(;;) { Body }
-    //   void IncrementLots(int &x) {
-    //     LOOP_FOREVER( ++x; )
-    //   }
-    // Selecting "++x" or "x" will do the right thing.
-    auto Range = toHalfOpenFileRange(SM, LangOpts, S);
-    assert(Range && "We should be able to get the File Range");
-    dlog("{1}claimRange: {0}", Range->printToString(SM), indent());
-    auto B = SM.getDecomposedLoc(Range->getBegin());
-    auto E = SM.getDecomposedLoc(Range->getEnd());
-    // Otherwise, nodes in macro expansions can't be selected.
-    if (B.first != SelFile || E.first != SelFile)
-      return;
-    // Attempt to claim the remaining range. If there's nothing to claim, only
-    // children were selected.
-    Claimed.claim(B.second, E.second, Result);
+
+    // We need to iterate over all the expanded tokens that are part of S.
+    // Consider VarDecl created by the macro expansion FLAG(x) -> int x = 0;
+    // Neither S.getBegin() nor S.getEnd() are arg expansions, but x is.
+    // The selection FLAG([[x]]) must partially select the VarDecl.
+    llvm::ArrayRef<syntax::Token> Remaining = Tokens.expandedTokens(S);
+    while (!Remaining.empty()) {
+      // Take consecutive tokens from the same context together for efficiency.
+      FileID FID = SM.getFileID(Remaining.front().location());
+      auto Batch = Remaining.take_while([&](const syntax::Token &T) {
+        return SM.getFileID(T.location()) == FID;
+      });
+      assert(!Batch.empty());
+      Remaining = Remaining.drop_front(Batch.size());
+
+      // There are several possible categories of FileID depending on how the
+      // preprocessor was used to generate these tokens:
+      //   main file, #included file, macro args, macro bodies.
+      // We need to identify the main-file tokens that represent Batch, and
+      // determine whether we want to exclusively claim them. Regular tokens
+      // represent one AST construct, but a macro invocation can represent many.
+
+      // Handle tokens written directly in the main file.
+      // Claim the token exclusively for this node.
+      if (FID == SelFile) {
+        Claimed.claim(SM.getFileOffset(Batch.front().location()),
+                      SM.getFileOffset(Batch.back().location()) +
+                          Batch.back().length(),
+                      Result);
+        continue;
+      }
+
+      // Handle tokens in another file #included into the main file.
+      // Check if the #include is selected, but don't claim it exclusively.
+      if (Batch.front().location().isFileID()) {
+        for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
+             Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
+          if (SM.getFileID(Loc) == SelFile) {
+            Claimed.peek(SM.getFileOffset(Loc), Result);
+            break;
+          }
+        }
+        continue;
+      }
+
+      // (We're definitely in a macro expansion at this point.)
+
+      SourceLocation ArgStart =
+          SM.getTopMacroCallerLoc(Batch.front().location());
+      // Handle tokens that were passed as a macro argument.
+      // Claim the token exclusively for this node.
+      // FIXME: this prevents selecting both occurrences of args used twice.
+      if (SM.getFileID(ArgStart) == SelFile) {
+        SourceLocation ArgEnd =
+            SM.getTopMacroCallerLoc(Batch.back().location());
+        Claimed.claim(SM.getFileOffset(ArgStart),
+                      SM.getFileOffset(ArgEnd) + Batch.back().length(), Result);
+        continue;
+      }
+
+      // Handle tokens produced by non-argument macro expansion.
+      // Check if the macro name is selected, don't claim it exclusively.
+      auto Expansion = SM.getDecomposedExpansionLoc(S.getBegin());
+      if (Expansion.first == SelFile)
+        // FIXME: also check ( and ) for function-like macros.
+        Claimed.peek(Expansion.second, Result);
+    }
     if (Result)
-      dlog("{1}hit selection: {0}",
-           SourceRange(SM.getComposedLoc(B.first, B.second),
-                       SM.getComposedLoc(E.first, E.second))
-               .printToString(SM),
-           indent());
+      dlog("{1}hit selection: {0}", S.printToString(SM), indent());
   }
 
   std::string indent(int Offset = 0) {
@@ -464,6 +526,7 @@
   const PrintingPolicy &PrintPolicy;
 #endif
   std::stack<Node *> Stack;
+  const syntax::TokenBuffer &Tokens;
   SelectedTokens Claimed;
   std::deque<Node> Nodes; // Stable pointers as we add more nodes.
   FileID SelFile;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to