ilya-biryukov updated this revision to Diff 208937.
ilya-biryukov marked 5 inline comments as done.
ilya-biryukov added a comment.

- Rebase
- Address comments
- Restructure the roles
- Remove the role from tree dumps for now With too many roles it is annoying to 
update the test outputs on incremental changes. I tried using the symbolic role 
names there, but they end up being too verbose.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63835/new/

https://reviews.llvm.org/D63835

Files:
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -136,19 +136,326 @@
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 |-TopLevelDeclaration
 | |-void
 | |-foo
 | |-(
 | |-)
 | `-CompoundStatement
-|   |-2: {
-|   `-3: }
+|   |-{
+|   `-}
 `-<eof>
 )txt"},
-  };
+      // if.
+      {
+          R"cpp(
+int main() {
+  if (true) {}
+  if (true) {} else if (false) {}
+}
+        )cpp",
+          R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-int
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   `-}
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-CompoundStatement
+|   | | |-{
+|   | | `-}
+|   | |-else
+|   | `-IfStatement
+|   |   |-if
+|   |   |-(
+|   |   |-UnknownExpression
+|   |   | `-false
+|   |   |-)
+|   |   `-CompoundStatement
+|   |     |-{
+|   |     `-}
+|   `-}
+`-<eof>
+        )txt"},
+      // for.
+      {R"cpp(
+void test() {
+  for (;;)  {}
+}
+)cpp",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ForStatement
+|   | |-for
+|   | |-(
+|   | |-;
+|   | |-;
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   `-}
+|   `-}
+`-<eof>
+        )txt"},
+      // declaration statement.
+      {"void test() { int a = 10; }",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-=
+|   | |-10
+|   | `-;
+|   `-}
+`-<eof>
+)txt"},
+      {"void test() { ; }", R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-EmptyStatement
+|   | `-;
+|   `-}
+`-<eof>
+)txt"},
+      // switch, case and default.
+      {R"cpp(
+void test() {
+  switch (true) {
+    case 0:
+    default:;
+  }
+}
+)cpp",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-SwitchStatement
+|   | |-switch
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   |-CaseStatement
+|   |   | |-case
+|   |   | |-UnknownExpression
+|   |   | | `-0
+|   |   | |-:
+|   |   | `-DefaultStatement
+|   |   |   |-default
+|   |   |   |-:
+|   |   |   `-EmptyStatement
+|   |   |     `-;
+|   |   `-}
+|   `-}
+`-<eof>
+)txt"},
+      // while.
+      {R"cpp(
+void test() {
+  while (true) { continue; break; }
+}
+)cpp",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-WhileStatement
+|   | |-while
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | `-CompoundStatement
+|   |   |-{
+|   |   |-ContinueStatement
+|   |   | |-continue
+|   |   | `-;
+|   |   |-BreakStatement
+|   |   | |-break
+|   |   | `-;
+|   |   `-}
+|   `-}
+`-<eof>
+)txt"},
+      // return.
+      {R"cpp(
+int test() { return 1; }
+      )cpp",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-int
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ReturnStatement
+|   | |-return
+|   | |-UnknownExpression
+|   | | `-1
+|   | `-;
+|   `-}
+`-<eof>
+       )txt"},
+      // Range-based for.
+      {R"cpp(
+void test() {
+  int a[3];
+  for (int x : a) ;
+}
+      )cpp",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-DeclarationStatement
+|   | |-int
+|   | |-a
+|   | |-[
+|   | |-3
+|   | |-]
+|   | `-;
+|   |-RangeBasedForStatement
+|   | |-for
+|   | |-(
+|   | |-int
+|   | |-x
+|   | |-:
+|   | |-UnknownExpression
+|   | | `-a
+|   | |-)
+|   | `-EmptyStatement
+|   |   `-;
+|   `-}
+`-<eof>
+       )txt"},
+      // Unhandled statements should end up as 'unknown statement'.
+      // This example uses a 'label statement', which does not yet have a syntax
+      // counterpart.
+      {"void main() { foo: return 100; }", R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-main
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-UnknownStatement
+|   | |-foo
+|   | |-:
+|   | `-ReturnStatement
+|   |   |-return
+|   |   |-UnknownExpression
+|   |   | `-100
+|   |   `-;
+|   `-}
+`-<eof>
+)txt"},
+      // expressions should be wrapped in 'ExpressionStatement' when they appear
+      // in a statement position.
+      {R"cpp(
+void test() {
+  test();
+  if (true) test(); else test();
+}
+    )cpp",
+       R"txt(
+*: TranslationUnit
+|-TopLevelDeclaration
+| |-void
+| |-test
+| |-(
+| |-)
+| `-CompoundStatement
+|   |-{
+|   |-ExpressionStatement
+|   | |-UnknownExpression
+|   | | |-test
+|   | | |-(
+|   | | `-)
+|   | `-;
+|   |-IfStatement
+|   | |-if
+|   | |-(
+|   | |-UnknownExpression
+|   | | `-true
+|   | |-)
+|   | |-ExpressionStatement
+|   | | |-UnknownExpression
+|   | | | |-test
+|   | | | |-(
+|   | | | `-)
+|   | | `-;
+|   | |-else
+|   | `-ExpressionStatement
+|   |   |-UnknownExpression
+|   |   | |-test
+|   |   | |-(
+|   |   | `-)
+|   |   `-;
+|   `-}
+`-<eof>
+       )txt"}};
 
   for (const auto &T : Cases) {
     auto *Root = buildTree(T.first);
Index: clang/lib/Tooling/Syntax/Tree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -85,13 +85,10 @@
 
 static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N,
                      const syntax::Arena &A, std::vector<bool> IndentMask) {
-  if (N->role() != syntax::NodeRole::Unknown) {
-    // FIXME: print the symbolic name of a role.
-    if (N->role() == syntax::NodeRole::Detached)
-      OS << "*: ";
-    else
-      OS << static_cast<int>(N->role()) << ": ";
-  }
+  if (N->role() == syntax::NodeRole::Detached)
+    OS << "*: ";
+  // FIXME: find a nice way to print other roles.
+
   if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) {
     dumpTokens(OS, *L->token(), A.sourceManager());
     OS << "\n";
Index: clang/lib/Tooling/Syntax/Nodes.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Nodes.cpp
+++ clang/lib/Tooling/Syntax/Nodes.cpp
@@ -18,18 +18,199 @@
     return OS << "TranslationUnit";
   case NodeKind::TopLevelDeclaration:
     return OS << "TopLevelDeclaration";
+  case NodeKind::UnknownExpression:
+    return OS << "UnknownExpression";
+  case NodeKind::UnknownStatement:
+    return OS << "UnknownStatement";
+  case NodeKind::DeclarationStatement:
+    return OS << "DeclarationStatement";
+  case NodeKind::EmptyStatement:
+    return OS << "EmptyStatement";
+  case NodeKind::SwitchStatement:
+    return OS << "SwitchStatement";
+  case NodeKind::CaseStatement:
+    return OS << "CaseStatement";
+  case NodeKind::DefaultStatement:
+    return OS << "DefaultStatement";
+  case NodeKind::IfStatement:
+    return OS << "IfStatement";
+  case NodeKind::ForStatement:
+    return OS << "ForStatement";
+  case NodeKind::WhileStatement:
+    return OS << "WhileStatement";
+  case NodeKind::ContinueStatement:
+    return OS << "ContinueStatement";
+  case NodeKind::BreakStatement:
+    return OS << "BreakStatement";
+  case NodeKind::ReturnStatement:
+    return OS << "ReturnStatement";
+  case NodeKind::RangeBasedForStatement:
+    return OS << "RangeBasedForStatement";
+  case NodeKind::ExpressionStatement:
+    return OS << "ExpressionStatement";
   case NodeKind::CompoundStatement:
     return OS << "CompoundStatement";
   }
   llvm_unreachable("unknown node kind");
 }
 
+llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) {
+  switch (R) {
+  case syntax::NodeRole::Detached:
+    return OS << "Detached";
+  case syntax::NodeRole::Unknown:
+    return OS << "Unknown";
+  case syntax::NodeRole::OpenParen:
+    return OS << "OpenParen";
+  case syntax::NodeRole::CloseParen:
+    return OS << "CloseParen";
+  case syntax::NodeRole::IntroducerKeyword:
+    return OS << "IntroducerKeyword";
+  case syntax::NodeRole::BodyStatement:
+    return OS << "BodyStatement";
+  case syntax::NodeRole::CaseStatement_value:
+    return OS << "CaseStatement_value";
+  case syntax::NodeRole::IfStatement_thenStatement:
+    return OS << "IfStatement_thenStatement";
+  case syntax::NodeRole::IfStatement_elseKeyword:
+    return OS << "IfStatement_elseKeyword";
+  case syntax::NodeRole::IfStatement_elseStatement:
+    return OS << "IfStatement_elseStatement";
+  case syntax::NodeRole::ReturnStatement_value:
+    return OS << "ReturnStatement_value";
+  case syntax::NodeRole::ExpressionStatement_expression:
+    return OS << "ExpressionStatement_expression";
+  case syntax::NodeRole::CompoundStatement_statement:
+    return OS << "CompoundStatement_statement";
+  }
+  llvm_unreachable("invalid role");
+}
+
+syntax::Leaf *syntax::SwitchStatement::switchKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::SwitchStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::CaseStatement::caseKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Expression *syntax::CaseStatement::value() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::CaseStatement_value));
+}
+
+syntax::Statement *syntax::CaseStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::DefaultStatement::defaultKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::DefaultStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::ifKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::thenStatement() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::IfStatement_thenStatement));
+}
+
+syntax::Leaf *syntax::IfStatement::elseKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IfStatement_elseKeyword));
+}
+
+syntax::Statement *syntax::IfStatement::elseStatement() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::IfStatement_elseStatement));
+}
+
+syntax::Leaf *syntax::ForStatement::forKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::ForStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::WhileStatement::whileKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::WhileStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Leaf *syntax::ContinueStatement::continueKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::BreakStatement::breakKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Leaf *syntax::ReturnStatement::returnKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Expression *syntax::ReturnStatement::value() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::ReturnStatement_value));
+}
+
+syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::IntroducerKeyword));
+}
+
+syntax::Statement *syntax::RangeBasedForStatement::body() {
+  return llvm::cast_or_null<syntax::Statement>(
+      findChild(syntax::NodeRole::BodyStatement));
+}
+
+syntax::Expression *syntax::ExpressionStatement::expression() {
+  return llvm::cast_or_null<syntax::Expression>(
+      findChild(syntax::NodeRole::ExpressionStatement_expression));
+}
+
 syntax::Leaf *syntax::CompoundStatement::lbrace() {
   return llvm::cast_or_null<syntax::Leaf>(
-      findChild(NodeRole::CompoundStatement_lbrace));
+      findChild(syntax::NodeRole::OpenParen));
+}
+
+std::vector<syntax::Statement *> syntax::CompoundStatement::statements() {
+  std::vector<syntax::Statement *> Children;
+  for (auto *C = firstChild(); C; C = C->nextSibling()) {
+    if (C->role() == syntax::NodeRole::CompoundStatement_statement)
+      Children.push_back(llvm::cast<syntax::Statement>(C));
+  }
+  return Children;
 }
 
 syntax::Leaf *syntax::CompoundStatement::rbrace() {
   return llvm::cast_or_null<syntax::Leaf>(
-      findChild(NodeRole::CompoundStatement_rbrace));
+      findChild(syntax::NodeRole::CloseParen));
 }
Index: clang/lib/Tooling/Syntax/BuildTree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/BuildTree.cpp
+++ clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -27,6 +27,8 @@
 
 using namespace clang;
 
+static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; }
+
 /// A helper class for constructing the syntax tree while traversing a clang
 /// AST.
 ///
@@ -52,6 +54,15 @@
   /// Range.
   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
 
+  /// Mark the \p Child node with a corresponding \p Role. All marked children
+  /// should be consumed by foldNode.
+  /// (!) this overload should only be called for expressions in a statement
+  /// position, it will wrap expressions into expression statement.
+  void markChild(Stmt *Child, NodeRole Role);
+  /// It is important to call this overload for expressions in non-statement
+  /// position to avoid wrapping into expression statement.
+  void markChild(Expr *Child, NodeRole Role);
+
   /// Set role for a token starting at \p Loc.
   void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
 
@@ -80,8 +91,23 @@
   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
     return getRange(D->getBeginLoc(), D->getEndLoc());
   }
+  llvm::ArrayRef<syntax::Token> getRange(const Expr *E) const {
+    return getRange(E->getBeginLoc(), E->getEndLoc());
+  }
+  /// Find the adjusted range for the statement, consuming the trailing
+  /// semicolon when needed.
   llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
-    return getRange(S->getBeginLoc(), S->getEndLoc());
+    auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc());
+    if (isa<CompoundStmt>(S))
+      return Tokens;
+
+    // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
+    // all statements that end with those. Consume this semicolon here.
+    //
+    // (!) statements never consume 'eof', so looking at the next token is ok.
+    if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
+      return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
+    return Tokens;
   }
 
 private:
@@ -223,16 +249,168 @@
   bool WalkUpFromCompoundStmt(CompoundStmt *S) {
     using NodeRole = syntax::NodeRole;
 
-    Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
-                           NodeRole::CompoundStatement_lbrace);
+    Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen);
+    for (auto *Child : S->body())
+      Builder.markChild(Child, NodeRole::CompoundStatement_statement);
     Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
-                           NodeRole::CompoundStatement_rbrace);
+                           NodeRole::CloseParen);
 
     Builder.foldNode(Builder.getRange(S),
                      new (allocator()) syntax::CompoundStatement);
     return true;
   }
 
+  // Some statements are not yet handled by syntax trees.
+  bool WalkUpFromStmt(Stmt *S) {
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::UnknownStatement);
+    return true;
+  }
+
+  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
+    // We override to traverse range initializer as VarDecl.
+    // RAT traverses it as a statement, we produce invalid node kinds in that
+    // case.
+    // FIXME: should do this in RAT instead?
+    if (S->getInit() && !TraverseStmt(S->getInit()))
+      return false;
+    if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
+      return false;
+    if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
+      return false;
+    if (S->getBody() && !TraverseStmt(S->getBody()))
+      return false;
+    return true;
+  }
+
+  // Some expressions are not yet handled by syntax trees.
+  bool WalkUpFromExpr(Expr *E) {
+    assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
+    Builder.foldNode(Builder.getRange(E),
+                     new (allocator()) syntax::UnknownExpression);
+    return true;
+  }
+
+  bool TraverseStmt(Stmt *S) {
+    if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
+      // (!) do not recurse into subexpressions.
+      // we do not have syntax trees for expressions yet, so we only want to see
+      // the first top-level expression.
+      return WalkUpFromExpr(E->IgnoreImplicit());
+    }
+    return RecursiveASTVisitor::TraverseStmt(S);
+  }
+
+  // The code below is very regular, it could even be generated with some
+  // preprocessor magic. We merely assign roles to the corresponding children
+  // and fold resulting nodes.
+  bool WalkUpFromDeclStmt(DeclStmt *S) {
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::DeclarationStatement);
+    return true;
+  }
+
+  bool WalkUpFromNullStmt(NullStmt *S) {
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::EmptyStatement);
+    return true;
+  }
+
+  bool WalkUpFromSwitchStmt(SwitchStmt *S) {
+    Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::SwitchStatement);
+    return true;
+  }
+
+  bool WalkUpFromCaseStmt(CaseStmt *S) {
+    Builder.markChildToken(S->getKeywordLoc(), tok::kw_case,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getLHS(), syntax::NodeRole::CaseStatement_value);
+    Builder.markChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::CaseStatement);
+    return true;
+  }
+
+  bool WalkUpFromDefaultStmt(DefaultStmt *S) {
+    Builder.markChildToken(S->getKeywordLoc(), tok::kw_default,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::DefaultStatement);
+    return true;
+  }
+
+  bool WalkUpFromIfStmt(IfStmt *S) {
+    Builder.markChildToken(S->getIfLoc(), tok::kw_if,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getThen(),
+                      syntax::NodeRole::IfStatement_thenStatement);
+    Builder.markChildToken(S->getElseLoc(), tok::kw_else,
+                           syntax::NodeRole::IfStatement_elseKeyword);
+    Builder.markChild(S->getElse(),
+                      syntax::NodeRole::IfStatement_elseStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::IfStatement);
+    return true;
+  }
+
+  bool WalkUpFromForStmt(ForStmt *S) {
+    Builder.markChildToken(S->getForLoc(), tok::kw_for,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::ForStatement);
+    return true;
+  }
+
+  bool WalkUpFromWhileStmt(WhileStmt *S) {
+    Builder.markChildToken(S->getWhileLoc(), tok::kw_while,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::WhileStatement);
+    return true;
+  }
+
+  bool WalkUpFromContinueStmt(ContinueStmt *S) {
+    Builder.markChildToken(S->getContinueLoc(), tok::kw_continue,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::ContinueStatement);
+    return true;
+  }
+
+  bool WalkUpFromBreakStmt(BreakStmt *S) {
+    Builder.markChildToken(S->getBreakLoc(), tok::kw_break,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::BreakStatement);
+    return true;
+  }
+
+  bool WalkUpFromReturnStmt(ReturnStmt *S) {
+    Builder.markChildToken(S->getReturnLoc(), tok::kw_return,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getRetValue(),
+                      syntax::NodeRole::ReturnStatement_value);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::ReturnStatement);
+    return true;
+  }
+
+  bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
+    Builder.markChildToken(S->getForLoc(), tok::kw_for,
+                           syntax::NodeRole::IntroducerKeyword);
+    Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement);
+    Builder.foldNode(Builder.getRange(S),
+                     new (allocator()) syntax::RangeBasedForStatement);
+    return true;
+  }
+
 private:
   /// A small helper to save some typing.
   llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
@@ -254,6 +432,25 @@
   Pending.assignRole(*findToken(Loc), Role);
 }
 
+void syntax::TreeBuilder::markChild(Stmt *Child, NodeRole Role) {
+  if (!Child)
+    return;
+
+  auto Range = getRange(Child);
+  // This is an expression in a statement position, consume the trailing
+  // semicolon and form an 'ExpressionStatement' node.
+  if (auto *E = dyn_cast<Expr>(Child)) {
+    Pending.assignRole(getRange(E), NodeRole::ExpressionStatement_expression);
+    // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
+    Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
+  }
+  Pending.assignRole(Range, Role);
+}
+
+void syntax::TreeBuilder::markChild(Expr *Child, NodeRole Role) {
+  Pending.assignRole(getRange(Child), Role);
+}
+
 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
   auto Tokens = Arena.tokenBuffer().expandedTokens();
   auto &SM = Arena.sourceManager();
Index: clang/include/clang/Tooling/Syntax/Nodes.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Nodes.h
+++ clang/include/clang/Tooling/Syntax/Nodes.h
@@ -26,22 +26,56 @@
   Leaf,
   TranslationUnit,
   TopLevelDeclaration,
+  UnknownExpression,
+  UnknownStatement,
+  DeclarationStatement,
+  EmptyStatement,
+  SwitchStatement,
+  CaseStatement,
+  DefaultStatement,
+  IfStatement,
+  ForStatement,
+  WhileStatement,
+  ContinueStatement,
+  BreakStatement,
+  ReturnStatement,
+  RangeBasedForStatement,
+  ExpressionStatement,
   CompoundStatement
 };
 /// For debugging purposes.
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K);
 
-/// A relation between a parent and child node. Used for implementing accessors.
+/// A relation between a parent and child node, e.g. 'left-hand-side of a binary
+/// expression'. Used for implementing accessors.
 enum class NodeRole : uint8_t {
-  // A node without a parent.
+  // Roles common to multiple node kinds.
+  /// A node without a parent
   Detached,
-  // Children of an unknown semantic nature, e.g. skipped tokens, comments.
+  /// Children of an unknown semantic nature, e.g. skipped tokens, comments.
   Unknown,
-  // FIXME: should this be shared for all other nodes with braces, e.g. init
-  //        lists?
-  CompoundStatement_lbrace,
-  CompoundStatement_rbrace
+  /// An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc.
+  OpenParen,
+  /// A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc.
+  CloseParen,
+  /// A keywords that introduces some grammar construct, e.g. 'if', 'try', etc.
+  IntroducerKeyword,
+  /// An inner statement for those that have only a single child of kind
+  /// statement, e.g. loop body for while, for, etc; inner statement for case,
+  /// default, etc.
+  BodyStatement,
+
+  // Roles specific to particular node kinds.
+  CaseStatement_value,
+  IfStatement_thenStatement,
+  IfStatement_elseKeyword,
+  IfStatement_elseStatement,
+  ReturnStatement_value,
+  ExpressionStatement_expression,
+  CompoundStatement_statement
 };
+/// For debugging purposes.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeRole R);
 
 /// A root node for a translation unit. Parent is always null.
 class TranslationUnit final : public Tree {
@@ -66,16 +100,189 @@
   }
 };
 
+/// A base class for all expressions. Note that expressions are not statements,
+/// even though they are in clang.
+class Expression : public Tree {
+public:
+  Expression(NodeKind K) : Tree(K) {}
+  static bool classof(const Node *N) {
+    return NodeKind::UnknownExpression <= N->kind() &&
+           N->kind() <= NodeKind::UnknownExpression;
+  }
+};
+
+/// An expression of an unknown kind, i.e. one not currently handled by the
+/// syntax tree.
+class UnknownExpression final : public Expression {
+public:
+  UnknownExpression() : Expression(NodeKind::UnknownExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownExpression;
+  }
+};
+
 /// An abstract node for C++ statements, e.g. 'while', 'if', etc.
+/// FIXME: add accessors for semicolon of statements that have it.
 class Statement : public Tree {
 public:
   Statement(NodeKind K) : Tree(K) {}
   static bool classof(const Node *N) {
-    return NodeKind::CompoundStatement <= N->kind() &&
+    return NodeKind::UnknownStatement <= N->kind() &&
            N->kind() <= NodeKind::CompoundStatement;
   }
 };
 
+/// A statement of an unknown kind, i.e. one not currently handled by the syntax
+/// tree.
+class UnknownStatement final : public Statement {
+public:
+  UnknownStatement() : Statement(NodeKind::UnknownStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownStatement;
+  }
+};
+
+/// E.g. 'int a, b = 10;'
+class DeclarationStatement final : public Statement {
+public:
+  DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::DeclarationStatement;
+  }
+};
+
+/// The no-op statement, i.e. ';'.
+class EmptyStatement final : public Statement {
+public:
+  EmptyStatement() : Statement(NodeKind::EmptyStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::EmptyStatement;
+  }
+};
+
+/// switch (<cond>) <body>
+class SwitchStatement final : public Statement {
+public:
+  SwitchStatement() : Statement(NodeKind::SwitchStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::SwitchStatement;
+  }
+  syntax::Leaf *switchKeyword();
+  syntax::Statement *body();
+};
+
+/// case <value>: <body>
+class CaseStatement final : public Statement {
+public:
+  CaseStatement() : Statement(NodeKind::CaseStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::CaseStatement;
+  }
+  syntax::Leaf *caseKeyword();
+  syntax::Expression *value();
+  syntax::Statement *body();
+};
+
+/// default: <body>
+class DefaultStatement final : public Statement {
+public:
+  DefaultStatement() : Statement(NodeKind::DefaultStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::DefaultStatement;
+  }
+  syntax::Leaf *defaultKeyword();
+  syntax::Statement *body();
+};
+
+/// if (cond) <then-statement> else <else-statement>
+/// FIXME: add condition that models 'expression  or variable declaration'
+class IfStatement final : public Statement {
+public:
+  IfStatement() : Statement(NodeKind::IfStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::IfStatement;
+  }
+  syntax::Leaf *ifKeyword();
+  syntax::Statement *thenStatement();
+  syntax::Leaf *elseKeyword();
+  syntax::Statement *elseStatement();
+};
+
+/// for (<init>; <cond>; <increment>) <body>
+class ForStatement final : public Statement {
+public:
+  ForStatement() : Statement(NodeKind::ForStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ForStatement;
+  }
+  syntax::Leaf *forKeyword();
+  syntax::Statement *body();
+};
+
+/// while (<cond>) <body>
+class WhileStatement final : public Statement {
+public:
+  WhileStatement() : Statement(NodeKind::WhileStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::WhileStatement;
+  }
+  syntax::Leaf *whileKeyword();
+  syntax::Statement *body();
+};
+
+/// continue;
+class ContinueStatement final : public Statement {
+public:
+  ContinueStatement() : Statement(NodeKind::ContinueStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ContinueStatement;
+  }
+  syntax::Leaf *continueKeyword();
+};
+
+/// break;
+class BreakStatement final : public Statement {
+public:
+  BreakStatement() : Statement(NodeKind::BreakStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::BreakStatement;
+  }
+  syntax::Leaf *breakKeyword();
+};
+
+/// return <expr>;
+class ReturnStatement final : public Statement {
+public:
+  ReturnStatement() : Statement(NodeKind::ReturnStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ReturnStatement;
+  }
+  syntax::Leaf *returnKeyword();
+  syntax::Expression *value();
+};
+
+/// for (<decl> : <init>) <body>
+class RangeBasedForStatement final : public Statement {
+public:
+  RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::RangeBasedForStatement;
+  }
+  syntax::Leaf *forKeyword();
+  syntax::Statement *body();
+};
+
+/// Expression in a statement position, e.g. functions calls inside compound
+/// statements or inside a loop body.
+class ExpressionStatement final : public Statement {
+public:
+  ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::ExpressionStatement;
+  }
+  syntax::Expression *expression();
+};
+
 /// { statement1; statement2; … }
 class CompoundStatement final : public Statement {
 public:
@@ -84,6 +291,8 @@
     return N->kind() == NodeKind::CompoundStatement;
   }
   syntax::Leaf *lbrace();
+  /// FIXME: use custom iterator instead of 'vector'.
+  std::vector<syntax::Statement *> statements();
   syntax::Leaf *rbrace();
 };
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to