[PATCH] D72581: [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer

Ilya Biryukov via Phabricator via cfe-commits Sun, 12 Jan 2020 21:17:18 -0800

ilya-biryukov created this revision.
ilya-biryukov added a reviewer: sammccall.
Herald added a project: clang.


Same restrictions apply as in the other direction: macro arguments are
not supported yet, only full macro expansions can be mapped.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72581

Files:
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp

Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -57,6 +57,7 @@
 using ::testing::Contains;
 using ::testing::ElementsAre;
 using ::testing::Field;
+using ::testing::IsEmpty;
 using ::testing::Matcher;
 using ::testing::Not;
 using ::testing::StartsWith;
@@ -178,10 +179,14 @@
   template <class T, class U, class Eq>
   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
                                  llvm::ArrayRef<T> Range, Eq F) {
-    for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
+    assert(Subrange.size() >= 1);
+    if (Range.size() < Subrange.size())
+      return llvm::makeArrayRef(Range.end(), Range.end());
+    for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
+         Begin <= Last; ++Begin) {
       auto It = Begin;
-      for (auto ItSub = Subrange.begin();
-           ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
+      for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
+           ++ItSub, ++It) {
         if (!F(*ItSub, *It))
           goto continue_outer;
       }
@@ -664,6 +669,101 @@
               ValueIs(SameRange(findSpelled("not_mapped"))));
 }
 
+TEST_F(TokenBufferTest, ExpandedBySpelled) {
+  recordTokens(R"cpp(
+    a1 a2 a3 b1 b2
+  )cpp");
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
+  // Searching for subranges of expanded tokens should give the corresponding
+  // spelled ones.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+  // Test search on simple macro expansions.
+  recordTokens(R"cpp(
+    #define A a1 a2 a3
+    #define B b1 b2
+
+    A split B
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+
+  // Recursive macro invocations.
+  recordTokens(R"cpp(
+    #define ID(x) x
+    #define B b1 b2
+
+    ID(ID(ID(a1) a2 a3)) split ID(B)
+  )cpp");
+
+  EXPECT_THAT(
+      Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
+      ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
+              ElementsAre(SameRange(findExpanded("b1 b2"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(
+                  findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
+              ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
+  // FIXME: these should succeed, but we do not support macro arguments yet.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
+              IsEmpty());
+
+  // Empty macro expansions.
+  recordTokens(R"cpp(
+    #define EMPTY
+    #define ID(X) X
+
+    EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
+    EMPTY EMPTY ID(4 5 6) split2
+    ID(7 8 9) EMPTY EMPTY
+  )cpp");
+  // Covered by empty expansions on one of both of the sides.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
+              ElementsAre(SameRange(findExpanded("4 5 6"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
+              ElementsAre(SameRange(findExpanded("7 8 9"))));
+  // Including the empty macro expansions on the side.
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
+              ElementsAre(SameRange(findExpanded("1 2 3"))));
+
+  // Empty mappings coming from various directives.
+  recordTokens(R"cpp(
+    #define ID(X) X
+    ID(1)
+    #pragma lalala
+    not_mapped
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
+              IsEmpty());
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
+              IsEmpty());
+
+  // Empty macro expansion.
+  recordTokens(R"cpp(
+    #define EMPTY
+    EMPTY int a = 100;
+  )cpp");
+  EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
+              IsEmpty());
+}
+
 TEST_F(TokenBufferTest, ExpandedTokensForRange) {
   recordTokens(R"cpp(
     #define SIGN(X) X##_washere
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -67,7 +67,8 @@
   auto F = First.range(SM);
   auto L = Last.range(SM);
   assert(F.file() == L.file() && "tokens from different files");
-  assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens");
+  assert((F == L || F.endOffset() <= L.beginOffset()) &&
+         "wrong order of tokens");
   return FileRange(F.file(), F.beginOffset(), L.endOffset());
 }
 
@@ -176,6 +177,72 @@
       /*Mapping*/ nullptr};
 }
 
+const TokenBuffer::Mapping *
+TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
+                                          const syntax::Token *Spelled) {
+  assert(F.SpelledTokens.data() <= Spelled);
+  unsigned SpelledI = Spelled - F.SpelledTokens.data();
+  assert(SpelledI < F.SpelledTokens.size());
+
+  auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
+    return M.BeginSpelled <= SpelledI;
+  });
+  if (It == F.Mappings.begin())
+    return nullptr;
+  --It;
+  return &*It;
+}
+
+llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
+  assert(!Spelled.empty());
+  assert(Spelled.front().location().isFileID());
+
+  auto FID = sourceManager().getFileID(Spelled.front().location());
+  auto It = Files.find(FID);
+  assert(It != Files.end());
+
+  auto &File = It->second;
+  assert(File.SpelledTokens.data() <= Spelled.data());
+  assert(Spelled.size() <= File.SpelledTokens.size());
+
+  auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
+  unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
+  unsigned ExpandedBegin;
+  if (!FrontMapping) {
+    ExpandedBegin = File.BeginExpanded + SpelledFrontI;
+  } else if (SpelledFrontI < FrontMapping->EndSpelled) {
+    if (SpelledFrontI != FrontMapping->BeginSpelled)
+      return {}; // FIXME: support macro arguments.
+    ExpandedBegin = FrontMapping->BeginExpanded;
+  } else {
+    ExpandedBegin =
+        FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
+  }
+
+  auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
+  unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
+  unsigned ExpandedEnd;
+  if (!BackMapping) {
+    ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
+  } else if (SpelledBackI < BackMapping->EndSpelled) {
+    if (SpelledBackI + 1 != BackMapping->EndSpelled)
+      return {}; // FIXME: support macro arguments.
+    ExpandedEnd = BackMapping->EndExpanded;
+  } else {
+    ExpandedEnd =
+        BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
+  }
+
+  assert(ExpandedBegin < ExpandedTokens.size());
+  assert(ExpandedEnd < ExpandedTokens.size());
+  // Avoid returning empty ranges.
+  if (ExpandedBegin == ExpandedEnd)
+    return {};
+  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
+                             ExpandedTokens.data() + ExpandedEnd)};
+}
+
 llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
   auto It = Files.find(FID);
   assert(It != Files.end());
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -218,6 +218,35 @@
   llvm::Optional<llvm::ArrayRef<syntax::Token>>
   spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const;
 
+  /// Find the subranges of expanded tokens, corresponding to \p Spelled.
+  /// Some spelled tokens may not be present in the expanded token stream, so
+  /// this function can return an empty vector, e.g. for tokens of macro
+  /// directives or disabled preprocessor branches.
+  ///
+  /// Some spelled tokens can be duplicated in the expanded token stream
+  /// multiple times and this function will return multiple results in those
+  /// cases. This happens when \p Spelled is inside a macro argument.
+  ///
+  /// (!) will return empty vector on tokens from #define body:
+  /// E.g. for the following example:
+  ///
+  ///   #define FIRST(A) f1 A = A f2
+  ///   #define SECOND s
+  ///
+  ///   a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s
+  /// The results would be
+  ///   spelled           => expanded
+  ///   ------------------------
+  ///   #define FIRST     => {}
+  ///   a FIRST(arg)      => {a f1 arg = arg f2}
+  ///   arg               => {arg, arg} // arg #1 is before `=` and arg #2 is
+  ///                                   // after `=` in the expanded tokens.
+  /// FIXME: return correct results on macro arguments. For now, we return an
+  ///        empty list.
+  /// EXPECTS: !Spelled.empty()
+  llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
+  expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const;
+
   /// An expansion produced by the preprocessor, includes macro expansions and
   /// preprocessor directives. Preprocessor always maps a non-empty range of
   /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a
@@ -303,6 +332,10 @@
   std::pair<const syntax::Token *, const Mapping *>
   spelledForExpandedToken(const syntax::Token *Expanded) const;
 
+  static const Mapping *
+  mappingStartingBeforeSpelled(const MarkedFile &F,
+                               const syntax::Token *Spelled);
+
   /// Token stream produced after preprocessing, conceputally this captures the
   /// same stream as 'clang -E' (excluding the preprocessor directives like
   /// #file, etc.).

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D72581: [Syntax] Add mapping from spelled to expanded tokens for TokenBuffer

Reply via email to