ilya-biryukov updated this revision to Diff 228023.
ilya-biryukov marked 20 inline comments as done.
ilya-biryukov added a comment.

- Group Traverse* and Walk* together
- s/RAT/RAV
- Add a comment about nullability of the accessors
- Name function for consuming statements and expressions differently


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -41,8 +41,8 @@
 
       void HandleTranslationUnit(ASTContext &Ctx) override {
         Arena = std::make_unique<syntax::Arena>(Ctx.getSourceManager(),
-                                                 Ctx.getLangOpts(),
-                                                 std::move(*Tokens).consume());
+                                                Ctx.getLangOpts(),
+                                                std::move(*Tokens).consume());
         Tokens = nullptr; // make sure we fail if this gets called twice.
         Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl());
       }
@@ -65,7 +65,7 @@
         auto Tokens =
             std::make_unique<syntax::TokenCollector>(CI.getPreprocessor());
         return std::make_unique<BuildSyntaxTree>(Root, Arena,
-                                                  std::move(Tokens));
+                                                 std::move(Tokens));
       }
 
     private:
@@ -136,18 +136,315 @@
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 `-TopLevelDeclaration
   |-void
   |-foo
   |-(
   |-)
   `-CompoundStatement
-    |-2: {
-    `-3: }
+    |-{
+    `-}
 )txt"},
-  };
+      // if.
+      {
+          R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+        )cpp",
+          R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-int
+  |-main
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-IfStatement
+    | |-if
+    | |-(
+    | |-UnknownExpression
+    | | `-true
+    | |-)
+    | `-CompoundStatement
+    |   |-{
+    |   `-}
+    |-IfStatement
+    | |-if
+    | |-(
+    | |-UnknownExpression
+    | | `-true
+    | |-)
+    | |-CompoundStatement
+    | | |-{
+    | | `-}
+    | |-else
+    | `-IfStatement
+    |   |-if
+    |   |-(
+    |   |-UnknownExpression
+    |   | `-false
+    |   |-)
+    |   `-CompoundStatement
+    |     |-{
+    |     `-}
+    `-}
+        )txt"},
+      // for.
+      {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-ForStatement
+    | |-for
+    | |-(
+    | |-;
+    | |-;
+    | |-)
+    | `-CompoundStatement
+    |   |-{
+    |   `-}
+    `-}
+        )txt"},
+      // declaration statement.
+      {"void test() { int a = 10; }",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-DeclarationStatement
+    | |-int
+    | |-a
+    | |-=
+    | |-10
+    | `-;
+    `-}
+)txt"},
+      {"void test() { ; }", R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-EmptyStatement
+    | `-;
+    `-}
+)txt"},
+      // switch, case and default.
+      {R"cpp(
+void test() {
+  switch (true) {
+    case 0:
+    default:;
+  }
+}
+)cpp",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-SwitchStatement
+    | |-switch
+    | |-(
+    | |-UnknownExpression
+    | | `-true
+    | |-)
+    | `-CompoundStatement
+    |   |-{
+    |   |-CaseStatement
+    |   | |-case
+    |   | |-UnknownExpression
+    |   | | `-0
+    |   | |-:
+    |   | `-DefaultStatement
+    |   |   |-default
+    |   |   |-:
+    |   |   `-EmptyStatement
+    |   |     `-;
+    |   `-}
+    `-}
+)txt"},
+      // while.
+      {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-WhileStatement
+    | |-while
+    | |-(
+    | |-UnknownExpression
+    | | `-true
+    | |-)
+    | `-CompoundStatement
+    |   |-{
+    |   |-ContinueStatement
+    |   | |-continue
+    |   | `-;
+    |   |-BreakStatement
+    |   | |-break
+    |   | `-;
+    |   `-}
+    `-}
+)txt"},
+      // return.
+      {R"cpp(
+int test() { return 1; }
+      )cpp",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-int
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-ReturnStatement
+    | |-return
+    | |-UnknownExpression
+    | | `-1
+    | `-;
+    `-}
+)txt"},
+      // Range-based for.
+      {R"cpp(
+void test() {
+  int a[3];
+  for (int x : a) ;
+}
+      )cpp",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-DeclarationStatement
+    | |-int
+    | |-a
+    | |-[
+    | |-3
+    | |-]
+    | `-;
+    |-RangeBasedForStatement
+    | |-for
+    | |-(
+    | |-int
+    | |-x
+    | |-:
+    | |-UnknownExpression
+    | | `-a
+    | |-)
+    | `-EmptyStatement
+    |   `-;
+    `-}
+       )txt"},
+      // Unhandled statements should end up as 'unknown statement'.
+      // This example uses a 'label statement', which does not yet have a syntax
+      // counterpart.
+      {"void main() { foo: return 100; }", R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-main
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-UnknownStatement
+    | |-foo
+    | |-:
+    | `-ReturnStatement
+    |   |-return
+    |   |-UnknownExpression
+    |   | `-100
+    |   `-;
+    `-}
+)txt"},
+      // expressions should be wrapped in 'ExpressionStatement' when they appear
+      // in a statement position.
+      {R"cpp(
+void test() {
+  test();
+  if (true) test(); else test();
+}
+    )cpp",
+       R"txt(
+*: TranslationUnit
+`-TopLevelDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-ExpressionStatement
+    | |-UnknownExpression
+    | | |-test
+    | | |-(
+    | | `-)
+    | `-;
+    |-IfStatement
+    | |-if
+    | |-(
+    | |-UnknownExpression
+    | | `-true
+    | |-)
+    | |-ExpressionStatement
+    | | |-UnknownExpression
+    | | | |-test
+    | | | |-(
+    | | | `-)
+    | | `-;
+    | |-else
+    | `-ExpressionStatement
+    |   |-UnknownExpression
+    |   | |-test
+    |   | |-(
+    |   | `-)
+    |   `-;
+    `-}
+)txt"}};
 
   for (const auto &T : Cases) {
     auto *Root = buildTree(T.first);
Index: clang/lib/Tooling/Syntax/Tree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -85,13 +85,10 @@
 
 static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N,
                      const syntax::Arena &A, std::vector<bool> IndentMask) {
-  if (N->role() != syntax::NodeRole::Unknown) {
-    // FIXME: print the symbolic name of a role.
-    if (N->role() == syntax::NodeRole::Detached)
-      OS << "*: ";
-    else
-      OS << static_cast<int>(N->role()) << ": ";
-  }
+  if (N->role() == syntax::NodeRole::Detached)
+    OS << "*: ";
+  // FIXME: find a nice way to print other roles.
+
   if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) {
     dumpTokens(OS, *L->token(), A.sourceManager());
     OS << "\n";
Index: clang/lib/Tooling/Syntax/Nodes.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Nodes.cpp
+++ clang/lib/Tooling/Syntax/Nodes.cpp
@@ -18,18 +18,199 @@
     return OS << "TranslationUnit";
   case NodeKind::TopLevelDeclaration:
     return OS << "TopLevelDeclaration";
+  case NodeKind::UnknownExpression:
+    return OS << "UnknownExpression";
+  case NodeKind::UnknownStatement:
+    return OS << "UnknownStatement";
+  case NodeKind::DeclarationStatement:
+    return OS << "DeclarationStatement";
+  case NodeKind::EmptyStatement:
+    return OS << "EmptyStatement";
+  case NodeKind::SwitchStatement:
+    return OS << "SwitchStatement";
+  case NodeKind::CaseStatement:
+    return OS << "CaseStatement";
+  case NodeKind::DefaultStatement:
+    return OS << "DefaultStatement";
+  case NodeKind::IfStatement:
+    return OS << "IfStatement";
+  case NodeKind::ForStatement:
+    return OS << "ForStatement";
+  case NodeKind::WhileStatement:
+    return OS << "WhileStatement";
+  case NodeKind::ContinueStatement:
+    return OS << "ContinueStatement";
+  case NodeKind::BreakStatement:
+    return OS << "BreakStatement";
+  case NodeKind::ReturnStatement:
+    return OS << "ReturnStatement";
+  case NodeKind::RangeBasedForStatement:
+    return OS << "RangeBasedForStatement";
+  case NodeKind::ExpressionStatement:
+    return OS << "ExpressionStatement";
   case NodeKind::CompoundStatement:
     return OS << "CompoundStatement";
   }
   llvm_unreachable("unknown node kind");
 }
 
+llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
+  switch (R) {
+  case syntax::NodeRole::Detached:
+    return OS << "Detached";
+  case syntax::NodeRole::Unknown:
+    return OS << "Unknown";
+  case syntax::NodeRole::OpenParen:
+    return OS << "OpenParen";
+  case syntax::NodeRole::CloseParen:
+    return OS << "CloseParen";
+  case syntax::NodeRole::IntroducerKeyword:
+    return OS << "IntroducerKeyword";
+  case syntax::NodeRole::BodyStatement:
+    return OS << "BodyStatement";
+  case syntax::NodeRole::CaseStatement_value:
+    return OS << "CaseStatement_value";
+  case syntax::NodeRole::IfStatement_thenStatement:
+    return OS << "IfStatement_thenStatement";
+  case syntax::NodeRole::IfStatement_elseKeyword:
+    return OS << "IfStatement_elseKeyword";
+  case syntax::NodeRole::IfStatement_elseStatement:
+    return OS << "IfStatement_elseStatement";
+  case syntax::NodeRole::ReturnStatement_value:
+    return OS << "ReturnStatement_value";
+  case syntax::NodeRole::ExpressionStatement_expression:
+    return OS << "ExpressionStatement_expression";
+  case syntax::NodeRole::CompoundStatement_statement:
+    return OS << "CompoundStatement_statement";
+  }
+  llvm_unreachable("invalid role");
+}
+
+syntax::Leaf *syntax::SwitchStatement::switchKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::SwitchStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::CaseStatement::caseKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Expression *syntax::CaseStatement::value() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::CaseStatement_value));
+}
+
+syntax::Statement *syntax::CaseStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::DefaultStatement::defaultKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::DefaultStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::ifKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::thenStatement() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::IfStatement_thenStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::elseKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IfStatement_elseKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::elseStatement() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::IfStatement_elseStatement));
+}
+
+syntax::Leaf *syntax::ForStatement::forKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::ForStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::WhileStatement::whileKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::WhileStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::ContinueStatement::continueKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::BreakStatement::breakKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::ReturnStatement::returnKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Expression *syntax::ReturnStatement::value() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::ReturnStatement_value));
+}
+
+syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::RangeBasedForStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Expression *syntax::ExpressionStatement::expression() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::ExpressionStatement_expression));
+}
+
 syntax::Leaf *syntax::CompoundStatement::lbrace() {
   return llvm::cast_or_null<syntax::Leaf>(
-      findChild(NodeRole::CompoundStatement_lbrace));
+      findChild(syntax::NodeRole::OpenParen));
+}
+
+std::vector<syntax::Statement *> syntax::CompoundStatement::statements() {
+  std::vector<syntax::Statement *> Children;
+  for (auto *C = firstChild(); C; C = C->nextSibling()) {
+    if (C->role() == syntax::NodeRole::CompoundStatement_statement)
+      Children.push_back(llvm::cast<syntax::Statement>(C));
+  }
+  return Children;
 }
 
 syntax::Leaf *syntax::CompoundStatement::rbrace() {
   return llvm::cast_or_null<syntax::Leaf>(
-      findChild(NodeRole::CompoundStatement_rbrace));
+      findChild(syntax::NodeRole::CloseParen));
 }
Index: clang/lib/Tooling/Syntax/BuildTree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/BuildTree.cpp
+++ clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -27,6 +27,8 @@
 
 using namespace clang;
 
+static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
+
 /// A helper class for constructing the syntax tree while traversing a clang
 /// AST.
 ///
@@ -52,6 +54,15 @@
   /// Range.
   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
 
+  /// Mark the \p Child node with a corresponding \p Role. All marked children
+  /// should be consumed by foldNode.
+  /// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
+  ///     wraps expressions into expression statement.
+  void markStmtChild(Stmt *Child, NodeRole Role);
+  /// Should be called for expressions in non-statement position to avoid
+  /// wrapping into expression statement.
+  void markExprChild(Expr *Child, NodeRole Role);
+
   /// Set role for a token starting at \p Loc.
   void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
 
@@ -83,8 +94,23 @@
   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
     return getRange(D->getBeginLoc(), D->getEndLoc());
   }
-  llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
-    return getRange(S->getBeginLoc(), S->getEndLoc());
+  llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
+    return getRange(E->getBeginLoc(), E->getEndLoc());
+  }
+  /// Find the adjusted range for the statement, consuming the trailing
+  /// semicolon when needed.
+  llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {
+    auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
+    if (isa<CompoundStmt>(S))
+      return Tokens;
+
+    // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
+    // all statements that end with those. Consume this semicolon here.
+    //
+    // (!) statements never consume 'eof', so looking at the next token is ok.
+    if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
+      return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
+    return Tokens;
   }
 
 private:
@@ -227,16 +253,168 @@
   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
     using NodeRole = syntax::NodeRole;
 
-    Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
-                           NodeRole::CompoundStatement_lbrace);
+    Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen);
+    for (auto *Child : S->body())
+      Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement);
     Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
-                           NodeRole::CompoundStatement_rbrace);
+                           NodeRole::CloseParen);
 
-    Builder.foldNode(Builder.getRange(S),
+    Builder.foldNode(Builder.getStmtRange(S),
                      new (allocator()) syntax::CompoundStatement);
     return true;
   }
 
+  // Some statements are not yet handled by syntax trees.
+  bool WalkUpFromStmt(Stmt *S) {
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::UnknownStatement);
+    return true;
+  }
+
+  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
+    // We override to traverse range initializer as VarDecl.
+    // RAV traverses it as a statement, we produce invalid node kinds in that
+    // case.
+    // FIXME: should do this in RAV instead?
+    if (S->getInit() && !TraverseStmt(S->getInit()))
+      return false;
+    if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
+      return false;
+    if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
+      return false;
+    if (S->getBody() && !TraverseStmt(S->getBody()))
+      return false;
+    return true;
+  }
+
+  bool TraverseStmt(Stmt *S) {
+    if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
+      // (!) do not recurse into subexpressions.
+      // we do not have syntax trees for expressions yet, so we only want to see
+      // the first top-level expression.
+      return WalkUpFromExpr(E->IgnoreImplicit());
+    }
+    return RecursiveASTVisitor::TraverseStmt(S);
+  }
+
+  // Some expressions are not yet handled by syntax trees.
+  bool WalkUpFromExpr(Expr *E) {
+    assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
+    Builder.foldNode(Builder.getExprRange(E),
+                     new (allocator()) syntax::UnknownExpression);
+    return true;
+  }
+
+  // The code below is very regular, it could even be generated with some
+  // preprocessor magic. We merely assign roles to the corresponding children
+  // and fold resulting nodes.
+  bool WalkUpFromDeclStmt(DeclStmt *S) {
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::DeclarationStatement);
+    return true;
+  }
+
+  bool WalkUpFromNullStmt(NullStmt *S) {
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::EmptyStatement);
+    return true;
+  }
+
+  bool WalkUpFromSwitchStmt(SwitchStmt *S) {
+    Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::SwitchStatement);
+    return true;
+  }
+
+  bool WalkUpFromCaseStmt(CaseStmt *S) {
+    Builder.markChildToken(S->getKeywordLoc(), tok::kw_case,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
+    Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::CaseStatement);
+    return true;
+  }
+
+  bool WalkUpFromDefaultStmt(DefaultStmt *S) {
+    Builder.markChildToken(S->getKeywordLoc(), tok::kw_default,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::DefaultStatement);
+    return true;
+  }
+
+  bool WalkUpFromIfStmt(IfStmt *S) {
+    Builder.markChildToken(S->getIfLoc(), tok::kw_if,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markStmtChild(S->getThen(),
+                          syntax::NodeRole::IfStatement_thenStatement);
+    Builder.markChildToken(S->getElseLoc(), tok::kw_else,
+                           syntax::NodeRole::IfStatement_elseKeyword);
+    Builder.markStmtChild(S->getElse(),
+                          syntax::NodeRole::IfStatement_elseStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::IfStatement);
+    return true;
+  }
+
+  bool WalkUpFromForStmt(ForStmt *S) {
+    Builder.markChildToken(S->getForLoc(), tok::kw_for,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::ForStatement);
+    return true;
+  }
+
+  bool WalkUpFromWhileStmt(WhileStmt *S) {
+    Builder.markChildToken(S->getWhileLoc(), tok::kw_while,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::WhileStatement);
+    return true;
+  }
+
+  bool WalkUpFromContinueStmt(ContinueStmt *S) {
+    Builder.markChildToken(S->getContinueLoc(), tok::kw_continue,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::ContinueStatement);
+    return true;
+  }
+
+  bool WalkUpFromBreakStmt(BreakStmt *S) {
+    Builder.markChildToken(S->getBreakLoc(), tok::kw_break,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::BreakStatement);
+    return true;
+  }
+
+  bool WalkUpFromReturnStmt(ReturnStmt *S) {
+    Builder.markChildToken(S->getReturnLoc(), tok::kw_return,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markExprChild(S->getRetValue(),
+                          syntax::NodeRole::ReturnStatement_value);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::ReturnStatement);
+    return true;
+  }
+
+  bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
+    Builder.markChildToken(S->getForLoc(), tok::kw_for,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getStmtRange(S),
+                     new (allocator()) syntax::RangeBasedForStatement);
+    return true;
+  }
+
 private:
   /// A small helper to save some typing.
   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
@@ -258,6 +436,26 @@
   Pending.assignRole(*findToken(Loc), Role);
 }
 
+void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {
+  if (!Child)
+    return;
+
+  auto Range = getStmtRange(Child);
+  // This is an expression in a statement position, consume the trailing
+  // semicolon and form an 'ExpressionStatement' node.
+  if (auto *E = dyn_cast<Expr>(Child)) {
+    Pending.assignRole(getExprRange(E),
+                       NodeRole::ExpressionStatement_expression);
+    // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
+    Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
+  }
+  Pending.assignRole(Range, Role);
+}
+
+void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {
+  Pending.assignRole(getExprRange(Child), Role);
+}
+
 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
   auto Tokens = Arena.tokenBuffer().expandedTokens();
   auto &SM = Arena.sourceManager();
Index: clang/include/clang/Tooling/Syntax/Nodes.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Nodes.h
+++ clang/include/clang/Tooling/Syntax/Nodes.h
@@ -6,6 +6,17 @@
 //
 //===----------------------------------------------------------------------===//
 // Syntax tree nodes for C, C++ and Objective-C grammar constructs.
+//
+// Nodes provide access to their syntactic components, e.g. IfStatement provides
+// a way to get its condition, then and else branches, tokens for 'if' and
+// 'else' keywords.
+// When using the accessors, please assume they can return null. This happens
+// because:
+//   - the corresponding subnode is optional in the C++ grammar, e.g. an else
+//     branch of an if statement,
+//   - syntactic errors occurred while parsing the corresponding subnode.
+// One notable exception is "introducer" keywords, e.g. the accessor for the
+// 'if' keyword of an if statement will never return null.
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H
 #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H
@@ -17,31 +28,70 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
-
 namespace clang {
 namespace syntax {
 
-/// A kind of a syntax node, used for implementing casts.
+/// A kind of a syntax node, used for implementing casts. The ordering and
+/// blocks of enumerator constants must correspond to the inheritance hierarchy
+/// of syntax::Node.
 enum class NodeKind : uint16_t {
   Leaf,
   TranslationUnit,
   TopLevelDeclaration,
+
+  // Expressions
+  UnknownExpression,
+
+  // Statements
+  UnknownStatement,
+  DeclarationStatement,
+  EmptyStatement,
+  SwitchStatement,
+  CaseStatement,
+  DefaultStatement,
+  IfStatement,
+  ForStatement,
+  WhileStatement,
+  ContinueStatement,
+  BreakStatement,
+  ReturnStatement,
+  RangeBasedForStatement,
+  ExpressionStatement,
   CompoundStatement
 };
 /// For debugging purposes.
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K);
 
-/// A relation between a parent and child node. Used for implementing accessors.
+/// A relation between a parent and child node, e.g. 'left-hand-side of
+/// a binary expression'. Used for implementing accessors.
 enum class NodeRole : uint8_t {
-  // A node without a parent.
+  // Roles common to multiple node kinds.
+  /// A node without a parent
   Detached,
-  // Children of an unknown semantic nature, e.g. skipped tokens, comments.
+  /// Children of an unknown semantic nature, e.g. skipped tokens, comments.
   Unknown,
-  // FIXME: should this be shared for all other nodes with braces, e.g. init
-  //        lists?
-  CompoundStatement_lbrace,
-  CompoundStatement_rbrace
+  /// An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc.
+  OpenParen,
+  /// A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc.
+  CloseParen,
+  /// A keywords that introduces some grammar construct, e.g. 'if', 'try', etc.
+  IntroducerKeyword,
+  /// An inner statement for those that have only a single child of kind
+  /// statement, e.g. loop body for while, for, etc; inner statement for case,
+  /// default, etc.
+  BodyStatement,
+
+  // Roles specific to particular node kinds.
+  CaseStatement_value,
+  IfStatement_thenStatement,
+  IfStatement_elseKeyword,
+  IfStatement_elseStatement,
+  ReturnStatement_value,
+  ExpressionStatement_expression,
+  CompoundStatement_statement
 };
+/// For debugging purposes.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeRole R);
 
 /// A root node for a translation unit. Parent is always null.
 class TranslationUnit final : public Tree {
@@ -66,16 +116,190 @@
   }
 };
 
+/// A base class for all expressions. Note that expressions are not statements,
+/// even though they are in clang.
+class Expression : public Tree {
+public:
+  Expression(NodeKind K) : Tree(K) {}
+  static bool classof(const Node *N) {
+    return NodeKind::UnknownExpression <= N->kind() &&
+           N->kind() <= NodeKind::UnknownExpression;
+  }
+};
+
+/// An expression of an unknown kind, i.e. one not currently handled by the
+/// syntax tree.
+class UnknownExpression final : public Expression {
+public:
+  UnknownExpression() : Expression(NodeKind::UnknownExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownExpression;
+  }
+};
+
 /// An abstract node for C++ statements, e.g. 'while', 'if', etc.
+/// FIXME: add accessors for semicolon of statements that have it.
 class Statement : public Tree {
 public:
   Statement(NodeKind K) : Tree(K) {}
   static bool classof(const Node *N) {
-    return NodeKind::CompoundStatement <= N->kind() &&
+    return NodeKind::UnknownStatement <= N->kind() &&
            N->kind() <= NodeKind::CompoundStatement;
   }
 };
 
+/// A statement of an unknown kind, i.e. one not currently handled by the syntax
+/// tree.
+class UnknownStatement final : public Statement {
+public:
+  UnknownStatement() : Statement(NodeKind::UnknownStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownStatement;
+  }
+};
+
+/// E.g. 'int a, b = 10;'
+class DeclarationStatement final : public Statement {
+public:
+  DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::DeclarationStatement;
+  }
+};
+
+/// The no-op statement, i.e. ';'.
+class EmptyStatement final : public Statement {
+public:
+  EmptyStatement() : Statement(NodeKind::EmptyStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::EmptyStatement;
+  }
+};
+
+/// switch (<cond>) <body>
+class SwitchStatement final : public Statement {
+public:
+  SwitchStatement() : Statement(NodeKind::SwitchStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::SwitchStatement;
+  }
+  syntax::Leaf *switchKeyword();
+  syntax::Statement *body();
+};
+
+/// case <value>: <body>
+class CaseStatement final : public Statement {
+public:
+  CaseStatement() : Statement(NodeKind::CaseStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::CaseStatement;
+  }
+  syntax::Leaf *caseKeyword();
+  syntax::Expression *value();
+  syntax::Statement *body();
+};
+
+/// default: <body>
+class DefaultStatement final : public Statement {
+public:
+  DefaultStatement() : Statement(NodeKind::DefaultStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::DefaultStatement;
+  }
+  syntax::Leaf *defaultKeyword();
+  syntax::Statement *body();
+};
+
+/// if (cond) <then-statement> else <else-statement>
+/// FIXME: add condition that models 'expression  or variable declaration'
+class IfStatement final : public Statement {
+public:
+  IfStatement() : Statement(NodeKind::IfStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::IfStatement;
+  }
+  syntax::Leaf *ifKeyword();
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();
+};
+
+/// for (<init>; <cond>; <increment>) <body>
+class ForStatement final : public Statement {
+public:
+  ForStatement() : Statement(NodeKind::ForStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ForStatement;
+  }
+  syntax::Leaf *forKeyword();
+  syntax::Statement *body();
+};
+
+/// while (<cond>) <body>
+class WhileStatement final : public Statement {
+public:
+  WhileStatement() : Statement(NodeKind::WhileStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::WhileStatement;
+  }
+  syntax::Leaf *whileKeyword();
+  syntax::Statement *body();
+};
+
+/// continue;
+class ContinueStatement final : public Statement {
+public:
+  ContinueStatement() : Statement(NodeKind::ContinueStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ContinueStatement;
+  }
+  syntax::Leaf *continueKeyword();
+};
+
+/// break;
+class BreakStatement final : public Statement {
+public:
+  BreakStatement() : Statement(NodeKind::BreakStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::BreakStatement;
+  }
+  syntax::Leaf *breakKeyword();
+};
+
+/// return <expr>;
+/// return;
+class ReturnStatement final : public Statement {
+public:
+  ReturnStatement() : Statement(NodeKind::ReturnStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ReturnStatement;
+  }
+  syntax::Leaf *returnKeyword();
+  syntax::Expression *value();
+};
+
+/// for (<decl> : <init>) <body>
+class RangeBasedForStatement final : public Statement {
+public:
+  RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::RangeBasedForStatement;
+  }
+  syntax::Leaf *forKeyword();
+  syntax::Statement *body();
+};
+
+/// Expression in a statement position, e.g. functions calls inside compound
+/// statements or inside a loop body.
+class ExpressionStatement final : public Statement {
+public:
+  ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ExpressionStatement;
+  }
+  syntax::Expression *expression();
+};
+
 /// { statement1; statement2; … }
 class CompoundStatement final : public Statement {
 public:
@@ -84,6 +308,8 @@
     return N->kind() == NodeKind::CompoundStatement;
   }
   syntax::Leaf *lbrace();
+  /// FIXME: use custom iterator instead of 'vector'.
+  std::vector<syntax::Statement *> statements();
   syntax::Leaf *rbrace();
 };
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D63835: [Sy... Ilya Biryukov via Phabricator via cfe-commits

Reply via email to