[PATCH] D77209: [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer

Marcel Hlopko via Phabricator via cfe-commits Wed, 01 Apr 2020 04:25:38 -0700

hlopko created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.


Same restrictions apply as in the other direction: macro arguments are
not supported yet, only full macro expansions can be mapped.

Taking over from https://reviews.llvm.org/D72581.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77209

Files:
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp

Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -57,6 +57,7 @@
 using ::testing::Contains;
 using ::testing::ElementsAre;
 using ::testing::Field;
+using ::testing::IsEmpty;
 using ::testing::Matcher;
 using ::testing::Not;
 using ::testing::Pointee;
@@ -185,10 +186,14 @@
   template <class T, class U, class Eq>
   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
                                  llvm::ArrayRef<T> Range, Eq F) {
-    for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
+    assert(Subrange.size() >= 1);
+    if (Range.size() < Subrange.size())
+      return llvm::makeArrayRef(Range.end(), Range.end());
+    for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
+         Begin <= Last; ++Begin) {
       auto It = Begin;
-      for (auto ItSub = Subrange.begin();
-           ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
+      for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
+           ++ItSub, ++It) {
         if (!F(*ItSub, *It))
           goto continue_outer;
       }
@@ -889,4 +894,100 @@
   ASSERT_EQ(Code.points().size(), 8u);
 }
 
+TEST_F(TokenBufferTest, ExpandedBySpelled) {
+  recordTokens(R"cpp(
+    a1 a2 a3 b1 b2
+  )cpp");
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
+  // Searching for subranges of expanded tokens should give the corresponding
+  // spelled ones.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+  // Test search on simple macro expansions.
+  recordTokens(R"cpp(
+    #define A a1 a2 a3
+    #define B b1 b2
+
+    A split B
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+  // Recursive macro invocations.
+  recordTokens(R"cpp(
+    #define ID(x) x
+    #define B b1 b2
+
+    ID(ID(ID(a1) a2 a3)) split ID(B)
+  )cpp");
+
+  EXPECT_THAT(
+      Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
+      ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(
+                  findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+  // FIXME: these should succeed, but we do not support macro arguments yet.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
+              IsEmpty());
+
+  // Empty macro expansions.
+  recordTokens(R"cpp(
+    #define EMPTY
+    #define ID(X) X
+
+    EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
+    EMPTY EMPTY ID(4 5 6) split2
+    ID(7 8 9) EMPTY EMPTY
+  )cpp");
+  // Covered by empty expansions on one of both of the sides.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
+              ElementsAre(SameRange(findExpanded("4 5 6"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
+              ElementsAre(SameRange(findExpanded("7 8 9"))));
+  // Including the empty macro expansions on the side.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(
+      Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
+      ElementsAre(SameRange(findExpanded("1 2 3"))));
+
+  // Empty mappings coming from various directives.
+  recordTokens(R"cpp(
+    #define ID(X) X
+    ID(1)
+    #pragma lalala
+    not_mapped
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
+              IsEmpty());
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
+              IsEmpty());
+
+  // Empty macro expansion.
+  recordTokens(R"cpp(
+    #define EMPTY
+    EMPTY int a = 100;
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
+              IsEmpty());
+}
+
 } // namespace
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -213,19 +213,90 @@
   // Our token could only be produced by the previous mapping.
   if (It == File.Mappings.begin()) {
     // No previous mapping, no need to modify offsets.
-    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
+    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
+            /*Mapping=*/nullptr};
   }
   --It; // 'It' now points to last mapping that started before our token.
 
   // Check if the token is part of the mapping.
   if (ExpandedIndex < It->EndExpanded)
-    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
+    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
 
   // Not part of the mapping, use the index from previous mapping to compute the
   // corresponding spelled token.
   return {
       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
-      /*Mapping*/ nullptr};
+      /*Mapping=*/nullptr};
+}
+
+const TokenBuffer::Mapping *
+TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
+                                          const syntax::Token *Spelled) {
+  assert(F.SpelledTokens.data() <= Spelled);
+  unsigned SpelledI = Spelled - F.SpelledTokens.data();
+  assert(SpelledI < F.SpelledTokens.size());
+
+  auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
+    return M.BeginSpelled <= SpelledI;
+  });
+  if (It == F.Mappings.begin())
+    return nullptr;
+  --It;
+  return &*It;
+}
+
+llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
+  if (Spelled.empty())
+    return {};
+  assert(Spelled.front().location().isFileID());
+
+  auto FID = sourceManager().getFileID(Spelled.front().location());
+  auto It = Files.find(FID);
+  assert(It != Files.end());
+
+  const MarkedFile &File = It->second;
+  // assert that Spelled is a subarray of File.SpelledTokens.
+  assert(File.SpelledTokens.data() <= Spelled.data());
+  assert(Spelled.size() <= File.SpelledTokens.size());
+
+  auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
+  unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
+  unsigned ExpandedBegin;
+  if (!FrontMapping) {
+    // Requested spelled tokens are mapped in the first mapping.
+    ExpandedBegin = File.BeginExpanded + SpelledFrontI;
+  } else if (SpelledFrontI < FrontMapping->EndSpelled) {
+    // Requested spelled tokens start in the FrontMapping.
+    if (SpelledFrontI != FrontMapping->BeginSpelled)
+      return {}; // FIXME: support macro arguments.
+    ExpandedBegin = FrontMapping->BeginExpanded;
+  } else {
+    ExpandedBegin =
+        FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
+  }
+
+  auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
+  unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
+  unsigned ExpandedEnd;
+  if (!BackMapping) {
+    ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
+  } else if (SpelledBackI < BackMapping->EndSpelled) {
+    if (SpelledBackI + 1 != BackMapping->EndSpelled)
+      return {}; // FIXME: support macro arguments.
+    ExpandedEnd = BackMapping->EndExpanded;
+  } else {
+    ExpandedEnd =
+        BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
+  }
+
+  assert(ExpandedBegin < ExpandedTokens.size());
+  assert(ExpandedEnd < ExpandedTokens.size());
+  // Avoid returning empty ranges.
+  if (ExpandedBegin == ExpandedEnd)
+    return {};
+  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
+                             ExpandedTokens.data() + ExpandedEnd)};
 }
 
 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -228,6 +228,35 @@
   llvm::Optional<llvm::ArrayRef<syntax::Token>>
   spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
 
+  /// Find the subranges of expanded tokens, corresponding to \p Spelled.
+  /// Some spelled tokens may not be present in the expanded token stream, so
+  /// this function can return an empty vector, e.g. for tokens of macro
+  /// directives or disabled preprocessor branches.
+  ///
+  /// Some spelled tokens can be duplicated in the expanded token stream
+  /// multiple times and this function will return multiple results in those
+  /// cases. This happens when \p Spelled is inside a macro argument.
+  ///
+  /// FIXME: return correct results on macro arguments. For now, we return an
+  ///        empty list.
+  ///
+  /// (!) will return empty vector on tokens from #define body:
+  /// E.g. for the following example:
+  ///
+  ///   #define FIRST(A) f1 A = A f2
+  ///   #define SECOND s
+  ///
+  ///   a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s
+  /// The results would be
+  ///   spelled           => expanded
+  ///   ------------------------
+  ///   #define FIRST     => {}
+  ///   a FIRST(arg)      => {a f1 arg = arg f2}
+  ///   arg               => {arg, arg} // arg #1 is before `=` and arg #2 is
+  ///                                   // after `=` in the expanded tokens.
+  llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+  expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const;
+
   /// An expansion produced by the preprocessor, includes macro expansions and
   /// preprocessor directives. Preprocessor always maps a non-empty range of
   /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a
@@ -317,6 +346,10 @@
   std::pair<const syntax::Token *, const Mapping *>
   spelledForExpandedToken(const syntax::Token *Expanded) const;
 
+  static const Mapping *
+  mappingStartingBeforeSpelled(const MarkedFile &F,
+                               const syntax::Token *Spelled);
+
   /// Token stream produced after preprocessing, conceputally this captures the
   /// same stream as 'clang -E' (excluding the preprocessor directives like
   /// #file, etc.).

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77209: [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer

Reply via email to