hokein updated this revision to Diff 439705.
hokein added a comment.

Revised to the TokenManager approach:

- Inroduce a Base Token class (TokenManager) for syntax-tree, the motivation is 
to allow using different underlying token implementation in syntax-tree
- Decouple the syntax-tree from the TokenBuffer:
  - syntax-tree structure (Tree.h) doesn't depend on the TokenBuffer, 
SourceManager Source location etc, it communicates with TokenManager interfaces;
  - syntax-tree Arena is simpler, the token-managing responsiblity is 
transferred to TokenManager;
  - in SyntaxTree directory, we implement a TokenBuffer-based 
SyntaxTokenManager, which mangues all token-related stuff
  - For the mutation/replacement computation APIs, currently they only work on 
a TokenBuffer-based token manager. Asssertion will be raised if it is not 
satisfied. It is an NFC change


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128411/new/

https://reviews.llvm.org/D128411

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/include/clang/Tooling/Syntax/TokenManager.h
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/ComputeReplacements.cpp
  clang/lib/Tooling/Syntax/Synthesis.cpp
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/tools/clang-check/ClangCheck.cpp
  clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
  clang/unittests/Tooling/Syntax/SynthesisTest.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.h

Index: clang/unittests/Tooling/Syntax/TreeTestBase.h
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -50,6 +50,7 @@
       new SourceManager(*Diags, *FileMgr);
   std::shared_ptr<CompilerInvocation> Invocation;
   // Set after calling buildTree().
+  std::unique_ptr<syntax::SyntaxTokenManager> TM;
   std::unique_ptr<syntax::TokenBuffer> TB;
   std::unique_ptr<syntax::Arena> Arena;
 };
Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTestBase.cpp
+++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp
@@ -35,13 +35,13 @@
 using namespace clang::syntax;
 
 namespace {
-ArrayRef<syntax::Token> tokens(syntax::Node *N) {
+ArrayRef<syntax::Token> tokens(syntax::Node *N, const SyntaxTokenManager &STM) {
   assert(N->isOriginal() && "tokens of modified nodes are not well-defined");
   if (auto *L = dyn_cast<syntax::Leaf>(N))
-    return llvm::makeArrayRef(L->getToken(), 1);
+    return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1);
   auto *T = cast<syntax::Tree>(N);
-  return llvm::makeArrayRef(T->findFirstLeaf()->getToken(),
-                            T->findLastLeaf()->getToken() + 1);
+  return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()),
+                            STM.getToken(T->findLastLeaf()->getTokenKey()) + 1);
 }
 } // namespace
 
@@ -69,23 +69,26 @@
   class BuildSyntaxTree : public ASTConsumer {
   public:
     BuildSyntaxTree(syntax::TranslationUnit *&Root,
+                    std::unique_ptr<syntax::SyntaxTokenManager> &TM,
                     std::unique_ptr<syntax::TokenBuffer> &TB,
                     std::unique_ptr<syntax::Arena> &Arena,
                     std::unique_ptr<syntax::TokenCollector> Tokens)
-        : Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) {
+        : Root(Root), TM(TM), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) {
       assert(this->Tokens);
     }
 
     void HandleTranslationUnit(ASTContext &Ctx) override {
       TB = std::make_unique<syntax::TokenBuffer>(std::move(*Tokens).consume());
       Tokens = nullptr; // make sure we fail if this gets called twice.
-      Arena = std::make_unique<syntax::Arena>(Ctx.getSourceManager(),
-                                              Ctx.getLangOpts(), *TB);
+      TM = std::make_unique<syntax::SyntaxTokenManager>(Ctx.getSourceManager(),
+                                                        Ctx.getLangOpts(), *TB);
+      Arena = std::make_unique<syntax::Arena>(*TM);
       Root = syntax::buildSyntaxTree(*Arena, Ctx);
     }
 
   private:
     syntax::TranslationUnit *&Root;
+    std::unique_ptr<syntax::SyntaxTokenManager> &TM;
     std::unique_ptr<syntax::TokenBuffer> &TB;
     std::unique_ptr<syntax::Arena> &Arena;
     std::unique_ptr<syntax::TokenCollector> Tokens;
@@ -94,21 +97,23 @@
   class BuildSyntaxTreeAction : public ASTFrontendAction {
   public:
     BuildSyntaxTreeAction(syntax::TranslationUnit *&Root,
+                          std::unique_ptr<syntax::SyntaxTokenManager> &TM,
                           std::unique_ptr<syntax::TokenBuffer> &TB,
                           std::unique_ptr<syntax::Arena> &Arena)
-        : Root(Root), TB(TB), Arena(Arena) {}
+        : Root(Root), TM(TM), TB(TB), Arena(Arena) {}
 
     std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                    StringRef InFile) override {
       // We start recording the tokens, ast consumer will take on the result.
       auto Tokens =
           std::make_unique<syntax::TokenCollector>(CI.getPreprocessor());
-      return std::make_unique<BuildSyntaxTree>(Root, TB, Arena,
+      return std::make_unique<BuildSyntaxTree>(Root, TM, TB, Arena,
                                                std::move(Tokens));
     }
 
   private:
     syntax::TranslationUnit *&Root;
+    std::unique_ptr<syntax::SyntaxTokenManager> &TM;
     std::unique_ptr<syntax::TokenBuffer> &TB;
     std::unique_ptr<syntax::Arena> &Arena;
   };
@@ -149,7 +154,7 @@
   Compiler.setSourceManager(SourceMgr.get());
 
   syntax::TranslationUnit *Root = nullptr;
-  BuildSyntaxTreeAction Recorder(Root, this->TB, this->Arena);
+  BuildSyntaxTreeAction Recorder(Root, this->TM, this->TB, this->Arena);
 
   // Action could not be executed but the frontend didn't identify any errors
   // in the code ==> problem in setting up the action.
@@ -163,7 +168,7 @@
 
 syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R,
                                           syntax::Node *Root) {
-  ArrayRef<syntax::Token> Toks = tokens(Root);
+  ArrayRef<syntax::Token> Toks = tokens(Root, *TM);
 
   if (Toks.front().location().isFileID() && Toks.back().location().isFileID() &&
       syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==
Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -112,7 +112,7 @@
                                      createLeaf(*Arena, tok::r_paren)};
   for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) {
     ASSERT_TRUE(Tree->findFirstLeaf() != nullptr);
-    EXPECT_EQ(Tree->findFirstLeaf()->getToken()->kind(), tok::l_paren);
+    EXPECT_EQ(TM->getToken(Tree->findFirstLeaf()->getTokenKey())->kind(), tok::l_paren);
   }
 }
 
@@ -122,7 +122,7 @@
                                      createLeaf(*Arena, tok::r_paren)};
   for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) {
     ASSERT_TRUE(Tree->findLastLeaf() != nullptr);
-    EXPECT_EQ(Tree->findLastLeaf()->getToken()->kind(), tok::r_paren);
+    EXPECT_EQ(TM->getToken(Tree->findLastLeaf()->getTokenKey())->kind(), tok::r_paren);
   }
 }
 
@@ -180,7 +180,7 @@
 private:
   std::string dumpQuotedTokensOrNull(const Node *N) {
     return N ? "'" +
-                   StringRef(N->dumpTokens(Arena->getSourceManager()))
+                   StringRef(N->dumpTokens(Arena->getTokenManager()))
                        .trim()
                        .str() +
                    "'"
Index: clang/unittests/Tooling/Syntax/SynthesisTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/SynthesisTest.cpp
+++ clang/unittests/Tooling/Syntax/SynthesisTest.cpp
@@ -27,7 +27,7 @@
       return ::testing::AssertionFailure()
              << "Root was not built successfully.";
 
-    auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str();
+    auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str();
     auto Expected = Dump.trim().str();
     // EXPECT_EQ shows the diff between the two strings if they are different.
     EXPECT_EQ(Expected, Actual);
@@ -175,7 +175,7 @@
 
   auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue);
   EXPECT_TRUE(
-      treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager())));
+      treeDumpEqual(Copy, StatementContinue->dump(Arena->getTokenManager())));
   // FIXME: Test that copy is independent of original, once the Mutations API is
   // more developed.
 }
Index: clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
+++ clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
@@ -26,7 +26,7 @@
     auto ErrorOK = errorOK(Code);
     if (!ErrorOK)
       return ErrorOK;
-    auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str();
+    auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str();
     // EXPECT_EQ shows the diff between the two strings if they are different.
     EXPECT_EQ(Tree.trim().str(), Actual);
     if (Actual != Tree.trim().str()) {
@@ -59,7 +59,7 @@
       auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root);
       assert(AnnotatedNode);
       auto AnnotatedNodeDump =
-          StringRef(AnnotatedNode->dump(Arena->getSourceManager()))
+          StringRef(AnnotatedNode->dump(Arena->getTokenManager()))
               .trim()
               .str();
       // EXPECT_EQ shows the diff between the two strings if they are different.
Index: clang/tools/clang-check/ClangCheck.cpp
===================================================================
--- clang/tools/clang-check/ClangCheck.cpp
+++ clang/tools/clang-check/ClangCheck.cpp
@@ -157,9 +157,10 @@
         clang::syntax::TokenBuffer TB = std::move(Collector).consume();
         if (TokensDump)
           llvm::outs() << TB.dumpForTests();
-        clang::syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), TB);
-        llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(
-            AST.getSourceManager());
+        clang::syntax::SyntaxTokenManager TM(AST.getSourceManager(),
+                                             AST.getLangOpts(), TB);
+        clang::syntax::Arena A(TM);
+        llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(TM);
       }
 
     private:
Index: clang/lib/Tooling/Syntax/Tree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -33,25 +33,7 @@
 }
 } // namespace
 
-syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
-                     const TokenBuffer &Tokens)
-    : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
-
-const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
-  return Tokens;
-}
-
-std::pair<FileID, ArrayRef<syntax::Token>>
-syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
-  auto FID = SourceMgr.createFileID(std::move(Input));
-  auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
-  assert(It.second && "duplicate FileID");
-  return {FID, It.first->second};
-}
-
-syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
-  assert(Tok != nullptr);
-}
+syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {}
 
 syntax::Node::Node(NodeKind Kind)
     : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
@@ -190,20 +172,8 @@
 }
 
 namespace {
-static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
-                     const SourceManager &SM) {
-  assert(L);
-  const auto *Token = L->getToken();
-  assert(Token);
-  // Handle 'eof' separately, calling text() on it produces an empty string.
-  if (Token->kind() == tok::eof)
-    OS << "<eof>";
-  else
-    OS << Token->text(SM);
-}
-
 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
-                     const SourceManager &SM, llvm::BitVector IndentMask) {
+                     const syntax::TokenManager &TM, llvm::BitVector IndentMask) {
   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
     if (N->getRole() != syntax::NodeRole::Unknown)
       OS << " " << N->getRole();
@@ -216,7 +186,7 @@
   assert(N);
   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
     OS << "'";
-    dumpLeaf(OS, L, SM);
+    OS << TM.getText(L->getTokenKey());
     OS << "'";
     DumpExtraInfo(N);
     OS << "\n";
@@ -242,25 +212,25 @@
       OS << "|-";
       IndentMask.push_back(true);
     }
-    dumpNode(OS, &It, SM, IndentMask);
+    dumpNode(OS, &It, TM, IndentMask);
     IndentMask.pop_back();
   }
 }
 } // namespace
 
-std::string syntax::Node::dump(const SourceManager &SM) const {
+std::string syntax::Node::dump(const TokenManager &TM) const {
   std::string Str;
   llvm::raw_string_ostream OS(Str);
-  dumpNode(OS, this, SM, /*IndentMask=*/{});
+  dumpNode(OS, this, TM, /*IndentMask=*/{});
   return std::move(OS.str());
 }
 
-std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
+std::string syntax::Node::dumpTokens(const TokenManager &TM) const {
   std::string Storage;
   llvm::raw_string_ostream OS(Storage);
   traverse(this, [&](const syntax::Node *N) {
     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
-      dumpLeaf(OS, L, SM);
+      OS << TM.getText(L->getTokenKey());
       OS << " ";
     }
   });
@@ -297,7 +267,8 @@
            C.getRole() == NodeRole::ListDelimiter);
     if (C.getRole() == NodeRole::ListDelimiter) {
       assert(isa<Leaf>(C));
-      assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
+      // FIXME: can be fixed by adding an tok::Kind in the Leaf node.
+      // assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
     }
   }
 
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -929,3 +929,13 @@
   }
   return Dump;
 }
+
+constexpr llvm::StringLiteral syntax::SyntaxTokenManager::Kind;
+
+std::pair<FileID, ArrayRef<syntax::Token>>
+syntax::SyntaxTokenManager::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
+  auto FID = SM.createFileID(std::move(Input));
+  auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM,LangOpts));
+  assert(It.second && "duplicate FileID");
+  return {FID, It.first->second};
+}
Index: clang/lib/Tooling/Syntax/Synthesis.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Synthesis.cpp
+++ clang/lib/Tooling/Syntax/Synthesis.cpp
@@ -8,6 +8,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
 #include "clang/Tooling/Syntax/Tree.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 
 using namespace clang;
 
@@ -28,14 +29,15 @@
 
   static std::pair<FileID, ArrayRef<Token>>
   lexBuffer(syntax::Arena &A, std::unique_ptr<llvm::MemoryBuffer> Buffer) {
-    return A.lexBuffer(std::move(Buffer));
+    auto& STM = llvm::cast<SyntaxTokenManager>(A.getTokenManager());
+    return STM.lexBuffer(std::move(Buffer));
   }
 };
 
 // FIXME: `createLeaf` is based on `syntax::tokenize` internally, as such it
 // doesn't support digraphs or line continuations.
 syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K,
-                                        StringRef Spelling) {
+                                        StringRef Spelling) {                          
   auto Tokens =
       FactoryImpl::lexBuffer(A, llvm::MemoryBuffer::getMemBufferCopy(Spelling))
           .second;
@@ -209,11 +211,12 @@
 
 syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A,
                                                      const syntax::Node *N) {
+  const auto& STM = llvm::cast<SyntaxTokenManager>(A.getTokenManager());
   if (const auto *L = dyn_cast<syntax::Leaf>(N))
     // `L->getToken()` gives us the expanded token, thus we implicitly expand
     // any macros here.
-    return createLeaf(A, L->getToken()->kind(),
-                      L->getToken()->text(A.getSourceManager()));
+    return createLeaf(A, STM.getToken(L->getTokenKey())->kind(),
+                       STM.getText(L->getTokenKey()));
 
   const auto *T = cast<syntax::Tree>(N);
   std::vector<std::pair<syntax::Node *, syntax::NodeRole>> Children;
Index: clang/lib/Tooling/Syntax/ComputeReplacements.cpp
===================================================================
--- clang/lib/Tooling/Syntax/ComputeReplacements.cpp
+++ clang/lib/Tooling/Syntax/ComputeReplacements.cpp
@@ -8,6 +8,7 @@
 #include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/Mutations.h"
 #include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/Support/Error.h"
 
 using namespace clang;
@@ -16,10 +17,12 @@
 using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>,
                                                 bool /*IsOriginal*/)>;
 /// Enumerates spans of tokens from the tree consecutively laid out in memory.
-void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
+void enumerateTokenSpans(const syntax::Tree *Root,
+                         const syntax::SyntaxTokenManager &STM,
+                         ProcessTokensFn Callback) {
   struct Enumerator {
-    Enumerator(ProcessTokensFn Callback)
-        : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
+    Enumerator(const syntax::SyntaxTokenManager &STM, ProcessTokensFn Callback)
+        : STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
           Callback(Callback) {}
 
     void run(const syntax::Tree *Root) {
@@ -39,7 +42,8 @@
       }
 
       auto *L = cast<syntax::Leaf>(N);
-      if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) {
+      if (SpanEnd == STM.getToken(L->getTokenKey()) &&
+          SpanIsOriginal == L->isOriginal()) {
         // Extend the current span.
         ++SpanEnd;
         return;
@@ -48,24 +52,25 @@
       if (SpanBegin)
         Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
       // Start recording a new span.
-      SpanBegin = L->getToken();
+      SpanBegin = STM.getToken(L->getTokenKey());
       SpanEnd = SpanBegin + 1;
       SpanIsOriginal = L->isOriginal();
     }
 
+    const syntax::SyntaxTokenManager &STM;
     const syntax::Token *SpanBegin;
     const syntax::Token *SpanEnd;
     bool SpanIsOriginal;
     ProcessTokensFn Callback;
   };
 
-  return Enumerator(Callback).run(Root);
+  return Enumerator(STM, Callback).run(Root);
 }
 
-syntax::FileRange rangeOfExpanded(const syntax::Arena &A,
+syntax::FileRange rangeOfExpanded(const syntax::SyntaxTokenManager &STM,
                                   llvm::ArrayRef<syntax::Token> Expanded) {
-  const auto &Buffer = A.getTokenBuffer();
-  const auto &SM = A.getSourceManager();
+  const auto &Buffer = STM.getTokenBuffer();
+  const auto &SM = STM.getSourceManager();
 
   // Check that \p Expanded actually points into expanded tokens.
   assert(Buffer.expandedTokens().begin() <= Expanded.begin());
@@ -85,8 +90,9 @@
 tooling::Replacements
 syntax::computeReplacements(const syntax::Arena &A,
                             const syntax::TranslationUnit &TU) {
-  const auto &Buffer = A.getTokenBuffer();
-  const auto &SM = A.getSourceManager();
+  const auto &STM = llvm::cast<SyntaxTokenManager>(A.getTokenManager());
+  const auto &Buffer = STM.getTokenBuffer();
+  const auto &SM = STM.getSourceManager();
 
   tooling::Replacements Replacements;
   // Text inserted by the replacement we are building now.
@@ -95,13 +101,12 @@
     if (ReplacedRange.empty() && Replacement.empty())
       return;
     llvm::cantFail(Replacements.add(tooling::Replacement(
-        SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement)));
+        SM, rangeOfExpanded(STM, ReplacedRange).toCharRange(SM), Replacement)));
     Replacement = "";
   };
-
   const syntax::Token *NextOriginal = Buffer.expandedTokens().begin();
   enumerateTokenSpans(
-      &TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
+      &TU, STM, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
         if (!IsOriginal) {
           Replacement +=
               syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM);
Index: clang/lib/Tooling/Syntax/BuildTree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/BuildTree.cpp
+++ clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -365,21 +365,23 @@
 /// Call finalize() to finish building the tree and consume the root node.
 class syntax::TreeBuilder {
 public:
-  TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
-    for (const auto &T : Arena.getTokenBuffer().expandedTokens())
+  TreeBuilder(syntax::Arena &Arena)
+      : Arena(Arena), STM(cast<SyntaxTokenManager>(Arena.getTokenManager())),
+        Pending(Arena, STM) {
+    for (const auto &T : STM.getTokenBuffer().expandedTokens())
       LocationToToken.insert({T.location(), &T});
   }
 
   llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); }
   const SourceManager &sourceManager() const {
-    return Arena.getSourceManager();
+    return STM.getSourceManager();
   }
 
   /// Populate children for \p New node, assuming it covers tokens from \p
   /// Range.
   void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) {
     assert(New);
-    Pending.foldChildren(Arena, Range, New);
+    Pending.foldChildren(STM, Range, New);
     if (From)
       Mapping.add(From, New);
   }
@@ -392,7 +394,7 @@
   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
                 NestedNameSpecifierLoc From) {
     assert(New);
-    Pending.foldChildren(Arena, Range, New);
+    Pending.foldChildren(STM, Range, New);
     if (From)
       Mapping.add(From, New);
   }
@@ -403,7 +405,7 @@
                 ASTPtr From) {
     assert(New);
     auto ListRange = Pending.shrinkToFitList(SuperRange);
-    Pending.foldChildren(Arena, ListRange, New);
+    Pending.foldChildren(STM, ListRange, New);
     if (From)
       Mapping.add(From, New);
   }
@@ -434,12 +436,12 @@
 
   /// Finish building the tree and consume the root node.
   syntax::TranslationUnit *finalize() && {
-    auto Tokens = Arena.getTokenBuffer().expandedTokens();
+    auto Tokens = STM.getTokenBuffer().expandedTokens();
     assert(!Tokens.empty());
     assert(Tokens.back().kind() == tok::eof);
 
     // Build the root of the tree, consuming all the children.
-    Pending.foldChildren(Arena, Tokens.drop_back(),
+    Pending.foldChildren(STM, Tokens.drop_back(),
                          new (Arena.getAllocator()) syntax::TranslationUnit);
 
     auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
@@ -464,7 +466,7 @@
     assert(First.isValid());
     assert(Last.isValid());
     assert(First == Last ||
-           Arena.getSourceManager().isBeforeInTranslationUnit(First, Last));
+           STM.getSourceManager().isBeforeInTranslationUnit(First, Last));
     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
   }
 
@@ -564,15 +566,15 @@
   ///
   /// Ensures that added nodes properly nest and cover the whole token stream.
   struct Forest {
-    Forest(syntax::Arena &A) {
-      assert(!A.getTokenBuffer().expandedTokens().empty());
-      assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof);
+    Forest(syntax::Arena &A, const syntax::SyntaxTokenManager &STM) {
+      assert(!STM.getTokenBuffer().expandedTokens().empty());
+      assert(STM.getTokenBuffer().expandedTokens().back().kind() == tok::eof);
       // Create all leaf nodes.
       // Note that we do not have 'eof' in the tree.
-      for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) {
+      for (const auto &T : STM.getTokenBuffer().expandedTokens().drop_back()) {
         auto *L = new (A.getAllocator()) syntax::Leaf(&T);
         L->Original = true;
-        L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value();
+        L->CanModify = STM.getTokenBuffer().spelledForExpanded(T).has_value();
         Trees.insert(Trees.end(), {&T, L});
       }
     }
@@ -620,7 +622,7 @@
     }
 
     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
-    void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens,
+    void foldChildren(const syntax::SyntaxTokenManager &STM, ArrayRef<syntax::Token> Tokens,
                       syntax::Tree *Node) {
       // Attach children to `Node`.
       assert(Node->getFirstChild() == nullptr && "node already has children");
@@ -646,7 +648,7 @@
       // Mark that this node came from the AST and is backed by the source code.
       Node->Original = true;
       Node->CanModify =
-          A.getTokenBuffer().spelledForExpanded(Tokens).has_value();
+          STM.getTokenBuffer().spelledForExpanded(Tokens).has_value();
 
       Trees.erase(BeginChildren, EndChildren);
       Trees.insert({FirstToken, Node});
@@ -660,18 +662,18 @@
       return Root;
     }
 
-    std::string str(const syntax::Arena &A) const {
+    std::string str(const syntax::SyntaxTokenManager &STM) const {
       std::string R;
       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
         unsigned CoveredTokens =
             It != Trees.end()
                 ? (std::next(It)->first - It->first)
-                : A.getTokenBuffer().expandedTokens().end() - It->first;
+                : STM.getTokenBuffer().expandedTokens().end() - It->first;
 
         R += std::string(
             formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(),
-                    It->first->text(A.getSourceManager()), CoveredTokens));
-        R += It->second->dump(A.getSourceManager());
+                    It->first->text(STM.getSourceManager()), CoveredTokens));
+        R += It->second->dump(STM);
       }
       return R;
     }
@@ -684,9 +686,10 @@
   };
 
   /// For debugging purposes.
-  std::string str() { return Pending.str(Arena); }
+  std::string str() { return Pending.str(STM); }
 
   syntax::Arena &Arena;
+  SyntaxTokenManager& STM;
   /// To quickly find tokens by their start location.
   llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken;
   Forest Pending;
@@ -1718,7 +1721,7 @@
     markExprChild(ChildExpr, NodeRole::Expression);
     ChildNode = new (allocator()) syntax::ExpressionStatement;
     // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
-    Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
+    Pending.foldChildren(STM, getStmtRange(Child), ChildNode);
   } else {
     ChildNode = Mapping.find(Child);
   }
Index: clang/include/clang/Tooling/Syntax/Tree.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tree.h
+++ clang/include/clang/Tooling/Syntax/Tree.h
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
-//   - leaf nodes correspond to a token in the expanded token stream,
+//   - leaf nodes correspond to a token key in the token manager
 //   - tree nodes correspond to language grammar constructs.
 //
 // The tree is initially built from an AST. Each node of a newly built tree
@@ -21,11 +21,8 @@
 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
 
-#include "clang/Basic/LangOptions.h"
-#include "clang/Basic/SourceLocation.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
-#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/TokenManager.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/iterator.h"
@@ -36,33 +33,18 @@
 namespace clang {
 namespace syntax {
 
-/// A memory arena for syntax trees. Also tracks the underlying token buffers,
-/// source manager, etc.
+/// A memory arena for syntax trees.
 class Arena {
 public:
-  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
-        const TokenBuffer &Tokens);
-
-  const SourceManager &getSourceManager() const { return SourceMgr; }
-  const LangOptions &getLangOptions() const { return LangOpts; }
-
-  const TokenBuffer &getTokenBuffer() const;
+  Arena(TokenManager& TokenMgr) : TokenMgr(TokenMgr) {}
   llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
 
-private:
-  /// Add \p Buffer to the underlying source manager, tokenize it and store the
-  /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
-  /// that were not written in user code.
-  std::pair<FileID, ArrayRef<Token>>
-  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
-  friend class FactoryImpl;
+  const TokenManager &getTokenManager() const { return TokenMgr; }
+  TokenManager &getTokenManager() { return TokenMgr; }
 
 private:
-  SourceManager &SourceMgr;
-  const LangOptions &LangOpts;
-  const TokenBuffer &Tokens;
-  /// IDs and storage for additional tokenized files.
-  llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
+  // Manage all token-related stuff.
+  TokenManager& TokenMgr;
   /// Keeps all the allocated nodes and their intermediate data structures.
   llvm::BumpPtrAllocator Allocator;
 };
@@ -122,9 +104,9 @@
   Node *getPreviousSibling() { return PreviousSibling; }
 
   /// Dumps the structure of a subtree. For debugging and testing purposes.
-  std::string dump(const SourceManager &SM) const;
+  std::string dump(const TokenManager &SM) const;
   /// Dumps the tokens forming this subtree.
-  std::string dumpTokens(const SourceManager &SM) const;
+  std::string dumpTokens(const TokenManager &SM) const;
 
   /// Asserts invariants on this node of the tree and its immediate children.
   /// Will not recurse into the subtree. No-op if NDEBUG is set.
@@ -153,16 +135,15 @@
   unsigned CanModify : 1;
 };
 
-/// A leaf node points to a single token inside the expanded token stream.
 class Leaf final : public Node {
 public:
-  Leaf(const Token *T);
+  Leaf(TokenManager::Key K);
   static bool classof(const Node *N);
 
-  const Token *getToken() const { return Tok; }
+  TokenManager::Key getTokenKey() const { return K; }
 
 private:
-  const Token *Tok;
+  TokenManager::Key K;
 };
 
 /// A node that has children and represents a syntactic language construct.
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -33,6 +33,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Token.h"
+#include "clang/Tooling/Syntax/TokenManager.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
@@ -457,6 +458,53 @@
   CollectPPExpansions *Collector;
 };
 
+/// A TokenBuffer-powered token manager.
+/// It tracks the underlying token buffers, source manager, etc.
+class SyntaxTokenManager : public TokenManager {
+public:
+  SyntaxTokenManager(SourceManager &SourceMgr, const LangOptions &LangOpts,
+                     const TokenBuffer &Tokens)
+      : SM(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
+ 
+  static bool classof(const TokenManager *N) { return N->kind() == Kind; }
+  llvm::StringLiteral kind() const override { return Kind; }
+
+  llvm::StringRef getText(Key I) const override {
+    const auto *Token = getToken(I);
+    assert(Token);
+    // Handle 'eof' separately, calling text() on it produces an empty string.
+    if (Token->kind() == tok::eof)
+      return "<eof>";
+
+    return Token->text(SM);
+  }
+
+  const syntax::Token *getToken(Key I) const {
+    return reinterpret_cast<const syntax::Token *>(I);
+  }
+  SourceManager &getSourceManager() { return SM; }
+  const SourceManager &getSourceManager() const { return SM; }
+  const TokenBuffer &getTokenBuffer() const { return Tokens; }
+
+private:
+  // This mangaer is powered by the TokenBuffer.
+  static constexpr llvm::StringLiteral Kind = "TokenBuffer";
+
+  /// Add \p Buffer to the underlying source manager, tokenize it and store the
+  /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
+  /// that were not written in user code.
+  std::pair<FileID, ArrayRef<Token>>
+  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
+  friend class FactoryImpl;
+
+  SourceManager &SM;
+  const LangOptions &LangOpts;
+  const TokenBuffer &Tokens;
+  /// IDs and storage for additional tokenized files.
+  llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
+};
+
+
 } // namespace syntax
 } // namespace clang
 
Index: clang/include/clang/Tooling/Syntax/TokenManager.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/TokenManager.h
@@ -0,0 +1,43 @@
+//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines Token interfaces for the syntax-tree, decoupling the syntax-tree from
+// the TokenBuffer. It enables producers (e.g. clang pseudoparser) to produce a
+// synatx-tree with different token implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace syntax {
+
+/// Base token interfaces for the syntax-tree.
+class TokenManager {
+public:
+  /// Describes what the exact class kind of the TokenManager is.
+  virtual llvm::StringLiteral kind() const = 0;
+
+  /// A key to identify a specific token. The token concept depends on the
+  /// underlying implementation -- it can be a spelled token from the original
+  /// source file or an expanded token.
+  /// The syntax-tree Leaf node holds a Key.
+  using Key = const void *;
+  /// Gets the text of token identified by the key.
+  virtual llvm::StringRef getText(Key K) const = 0;
+
+  // FIXME: add an interface for getting token kind.
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_SYNTAX_BASE_TOKEN_H
Index: clang/include/clang/Tooling/Syntax/Nodes.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Nodes.h
+++ clang/include/clang/Tooling/Syntax/Nodes.h
@@ -23,7 +23,6 @@
 
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Token.h"
-#include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to