hokein updated this revision to Diff 407862.
hokein marked 10 inline comments as done.
hokein added a comment.
Herald added a subscriber: mgrang.

- rebase, rescope the patch to LRTable
- refine the Action class interfaces, and nameing;
- use a more compact Index, reduce LRTable from 660KB => 335KB;
- address review comments;


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118196/new/

https://reviews.llvm.org/D118196

Files:
  clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
  clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h
  clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt
  clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp
  clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
  clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp
  clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp
  clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt
  clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp
  clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp

Index: clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp
@@ -0,0 +1,57 @@
+//===--- LRTableTest.cpp - ---------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/Pseudo/LRTable.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Pseudo/Grammar.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+namespace clang {
+namespace syntax {
+namespace pseudo {
+namespace {
+
+using testing::IsEmpty;
+using testing::UnorderedElementsAre;
+using Action = LRTable::Action;
+
+TEST(LRTable, Builder) {
+  LRTable::Builder Build;
+  GrammarTable GTable;
+  initTerminals(GTable.Terminals);
+
+  //           eof   semi  ...
+  // +-------+----+-------+---
+  // |state0 |    | s0,r0 |...
+  // |state1 | acc|       |...
+  // |state2 |    |  r1   |...
+  // +-------+----+-------+---
+  Build.insert(/* State */ 0, tokenSymbol(tok::semi), Action::shift(0));
+  Build.insert(/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0));
+  Build.insert(/* State */ 1, tokenSymbol(tok::eof), Action::accept());
+  Build.insert(/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1));
+
+  LRTable T = std::move(Build).build(GTable);
+  EXPECT_THAT(T.find(0, tokenSymbol(tok::eof)), IsEmpty());
+  EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),
+              UnorderedElementsAre(Action::shift(0), Action::reduce(0)));
+  EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),
+              UnorderedElementsAre(Action::accept()));
+  EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());
+  EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
+              UnorderedElementsAre(Action::reduce(1)));
+  // Verify the behaivor for other non-available-actions terminals.
+  EXPECT_THAT(T.find(2, tokenSymbol(tok::kw_int)), IsEmpty());
+}
+
+} // namespace
+} // namespace pseudo
+} // namespace syntax
+} // namespace clang
Index: clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp
+++ clang/unittests/Tooling/Syntax/Pseudo/LRGraphTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/Syntax/Pseudo/LRGraph.h"
+#include "clang/Tooling/Syntax/Pseudo/LRTable.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <memory>
@@ -20,14 +21,16 @@
   struct TestCase {
     llvm::StringRef BNF;
     llvm::StringRef ExpectedStates;
+    llvm::StringRef ExpectedTables;
   };
 
-  TestCase Cases[] = {{
-                          R"bnf(
+  TestCase Cases[] = {
+      {
+          R"bnf(
 _ := expr
 expr := IDENTIFIER
       )bnf",
-                          R"(States:
+          R"(States:
 State 0
     _ :=  • expr
     expr :=  • IDENTIFIER
@@ -37,15 +40,25 @@
     expr := IDENTIFIER • 
 0 ->[expr] 1
 0 ->[IDENTIFIER] 2
-)"},
-                      {// A grammar with a S/R conflict in SLR table:
-                       // (id-id)-id, or id-(id-id).
-                       R"bnf(
+)",
+          R"(LRTable:
+State 0
+    'IDENTIFIER': shift state 2
+    'expr': go to state 1
+State 1
+    'EOF': accept
+State 2
+    'EOF': reduce by rule 1 'expr := IDENTIFIER'
+)",
+      },
+      {// A grammar with a S/R conflict in SLR table:
+       // (id-id)-id, or id-(id-id).
+       R"bnf(
 _ := expr
 expr := expr - expr  # S/R conflict at state 4 on '-' token
 expr := IDENTIFIER
-      )bnf",
-                       R"(States:
+     )bnf",
+       R"(States:
 State 0
     _ :=  • expr
     expr :=  • expr - expr
@@ -68,13 +81,33 @@
 3 ->[expr] 4
 3 ->[IDENTIFIER] 2
 4 ->[-] 3
-)"}};
+)",
+       R"(LRTable:
+State 0
+    'IDENTIFIER': shift state 2
+    'expr': go to state 1
+State 1
+    'EOF': accept
+    '-': shift state 3
+State 2
+    'EOF': reduce by rule 1 'expr := IDENTIFIER'
+    '-': reduce by rule 1 'expr := IDENTIFIER'
+State 3
+    'IDENTIFIER': shift state 2
+    'expr': go to state 4
+State 4
+    'EOF': reduce by rule 2 'expr := expr - expr'
+    '-': shift state 3
+    '-': reduce by rule 2 'expr := expr - expr'
+)"},
+  };
   for (const auto &C : Cases) {
     std::vector<std::string> Diags;
     auto G = Grammar::parseBNF(C.BNF, Diags);
     ASSERT_THAT(Diags, testing::IsEmpty());
     auto LR0 = LRGraph::buildLR0(*G);
     EXPECT_EQ(LR0.dumpForTests(*G), C.ExpectedStates);
+    EXPECT_EQ(LRTable::buildSLR(*G).dumpForTests(*G), C.ExpectedTables);
   }
 }
 
Index: clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt
===================================================================
--- clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt
+++ clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt
@@ -5,6 +5,7 @@
 add_clang_unittest(ClangPseudoTests
   GrammarTest.cpp
   LRGraphTest.cpp
+  LRTableTest.cpp
 )
 
 clang_target_link_libraries(ClangPseudoTests
Index: clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp
@@ -0,0 +1,49 @@
+//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Pseudo/Grammar.h"
+#include "clang/Tooling/Syntax/Pseudo/LRGraph.h"
+#include "clang/Tooling/Syntax/Pseudo/LRTable.h"
+
+namespace clang {
+namespace syntax {
+namespace pseudo {
+
+LRTable LRTable::buildSLR(const Grammar &G) {
+  Builder Build;
+  auto Graph = LRGraph::buildLR0(G);
+  for (const auto &T : Graph.edges()) {
+    Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
+    Build.insert(T.Src, T.Label, Act);
+  }
+
+  auto FollowSets = followSets(G);
+  for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
+    for (const Item &I : Graph.states()[SID].Items) {
+      // If we've just parsed the start symbol, we can accept the input.
+      if (G.lookupRule(I.rule()).Target == G.startSymbol() && !I.hasNext()) {
+        Build.insert(SID, tokenSymbol(tok::eof), Action::accept());
+        continue;
+      }
+      if (!I.hasNext()) {
+        // If we've reached the end of a rule A := ..., then we can reduce if
+        // the next token is in the follow set of A".
+        for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {
+          assert(isToken(Follow));
+          Build.insert(SID, Follow, Action::reduce(I.rule()));
+        }
+      }
+    }
+  }
+  return std::move(Build).build(G.table());
+}
+
+} // namespace pseudo
+} // namespace syntax
+} // namespace clang
Index: clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp
@@ -0,0 +1,197 @@
+//===--- LRTable.cpp - Parsing table for LR parsers --------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/Pseudo/LRTable.h"
+#include "clang/Tooling/Syntax/Pseudo/Grammar.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <tuple>
+#include <utility>
+
+namespace clang {
+namespace syntax {
+namespace pseudo {
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
+  switch (A.kind()) {
+  case LRTable::Action::Error:
+    return OS << "err";
+  case LRTable::Action::Shift:
+    return OS << llvm::formatv("shift state {0}", A.getShiftState());
+  case LRTable::Action::Reduce:
+    return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
+  case LRTable::Action::GoTo:
+    return OS << llvm::formatv("goTo state {0}", A.getGoToState());
+  case LRTable::Action::Accept:
+    return OS << "acc";
+  }
+  assert(false);
+}
+
+std::string LRTable::dumpStatistics() const {
+  StateID NumOfStates = 0;
+  for (auto It : StateIdx)
+    NumOfStates = std::max(It, NumOfStates);
+  return llvm::formatv(R"(
+Statistics of the LR parsing table:
+    number of states: {0}
+    number of actions: {1}
+    size of the table (bytes): {2}
+)",
+                       NumOfStates, Actions.size(), bytes())
+      .str();
+}
+
+std::string LRTable::dumpForTests(const Grammar &G) const {
+  std::string Result;
+  llvm::raw_string_ostream OS(Result);
+  StateID MaxState = 0;
+  for (const auto &It : StateIdx)
+    MaxState = std::max(MaxState, It);
+  OS << "LRTable:\n";
+  for (StateID S = 0; S <= MaxState; ++S) {
+    OS << llvm::formatv("State {0}\n", S);
+    for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) {
+      SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
+      for (auto A : find(S, TokID)) {
+        if (A.kind() == LRTable::Action::Shift)
+          OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",
+                                        G.symbolName(TokID), A.getShiftState());
+        else if (A.kind() == LRTable::Action::Reduce)
+          OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
+                                        G.symbolName(TokID), A.getReduceRule(),
+                                        G.dumpRule(A.getReduceRule()));
+        else if (A.kind() == LRTable::Action::Accept)
+          OS.indent(4) << llvm::formatv("'{0}': accept\n", G.symbolName(TokID));
+      }
+    }
+    for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
+         ++NontermID) {
+      if (find(S, NontermID).empty())
+        continue;
+      OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",
+                                    G.symbolName(NontermID),
+                                    getGoToState(S, NontermID));
+    }
+  }
+  return OS.str();
+}
+
+llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State,
+                                                    SymbolID Terminal) const {
+  assert(pseudo::isToken(Terminal));
+  return find(State, Terminal);
+}
+
+LRTable::StateID LRTable::getGoToState(StateID State,
+                                       SymbolID Nonterminal) const {
+  assert(pseudo::isNonterminal(Nonterminal));
+  auto Result = find(State, Nonterminal);
+  assert(Result.size() == 1 && Result.front().kind() == Action::GoTo);
+  return Result.front().getGoToState();
+}
+
+llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const {
+  size_t Idx = isToken(ID) ? symbolToToken(ID) : ID;
+  assert(isToken(ID) ? Idx + 1 < TerminalIdx.size()
+                     : Idx + 1 < NontermIdx.size());
+  std::pair<size_t, size_t> TargetStateRange =
+      isToken(ID) ? std::make_pair(TerminalIdx[Idx], TerminalIdx[Idx + 1])
+                  : std::make_pair(NontermIdx[Idx], NontermIdx[Idx + 1]);
+  auto TargetedStates =
+      llvm::makeArrayRef(StateIdx.data() + TargetStateRange.first,
+                         StateIdx.data() + TargetStateRange.second);
+
+  assert(llvm::is_sorted(TargetedStates) &&
+         "subrange of the StateIdx should be sorted!");
+  const LRTable::StateID *It = llvm::partition_point(
+      TargetedStates, [&Src](LRTable::StateID S) { return S < Src; });
+  if (It == TargetedStates.end())
+    return {};
+  size_t Start = It - StateIdx.data(), End = Start;
+  while (End < StateIdx.size() && StateIdx[End] == Src)
+    ++End;
+  return llvm::makeArrayRef(&Actions[Start], &Actions[End]);
+}
+
+bool LRTable::Builder::insert(StateID Src, SymbolID Label,
+                              LRTable::Action New) {
+  return Entries.insert({Src, Label, New}).second;
+}
+
+LRTable LRTable::Builder::build(const GrammarTable &GT) && {
+  // E.g. given the following parsing table with 3 states and 3 terminals:
+  //
+  //            a    b     c
+  // +-------+----+-------+-+
+  // |state0 |    | s0,r0 | |
+  // |state1 | acc|       | |
+  // |state2 |    |  r1   | |
+  // +-------+----+-------+-+
+  //
+  // The final LRTable:
+  //  - TerminalIdx: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel)
+  //  - StateIdx:    [ 1,    0,  0,  2]
+  //    Actions:     [ acc, s0, r0, r1]
+  //                   ~~~ corresponding range for terminal a
+  //                        ~~~~~~~~~~ corresponding range for terminal b
+  // First step, we sort all entries by (Symbol, State, Action).
+  std::vector<Entry> Sorted(Entries.begin(), Entries.end());
+  llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
+    return std::forward_as_tuple(L.Symbol, L.State, L.Act.value()) <
+           std::forward_as_tuple(R.Symbol, R.State, R.Act.value());
+  });
+
+  LRTable Table;
+  Table.Actions.reserve(Sorted.size());
+  Table.StateIdx.reserve(Sorted.size());
+  // We are good to finalize the StateIdx and Actions.
+  for (const auto &E : Sorted) {
+    Table.Actions.push_back(E.Act);
+    Table.StateIdx.push_back(E.State);
+  }
+  // Initialize the terminal and nonterminal idx, all ranges are empty by
+  // default.
+  Table.TerminalIdx.resize(GT.Terminals.size() + 1, 0);
+  Table.NontermIdx.resize(GT.Nonterminals.size() + 1, 0);
+  llvm::ArrayRef<Entry> SortedEntries = Sorted;
+  while (!SortedEntries.empty()) {
+    // We iterate each symbol per time (conceptually a column in the matrix per
+    // time), calculate the number of the available actions, and set it to
+    // SymbolIdx.
+    const Entry &Front = SortedEntries.front();
+    // llvm::MutableArrayRef<uint32_t> SymbolIdx =
+    //     isToken(Front.Symbol) ? Table.TerminalIdx : Table.NontermIdx;
+    llvm::MutableArrayRef<uint32_t> SymbolIdx = Table.NontermIdx;
+    if (isToken(Front.Symbol))
+      SymbolIdx = Table.TerminalIdx;
+    unsigned Index =
+        isToken(Front.Symbol) ? symbolToToken(Front.Symbol) : Front.Symbol;
+    SymbolIdx[Index] = &Front - &Sorted.front();
+    auto Batch = SortedEntries.take_while(
+        [Target = Front.Symbol](const Entry &E) { return E.Symbol == Target; });
+    assert(!Batch.empty());
+    SymbolIdx[Index + 1] = SymbolIdx[Index] + Batch.size();
+    SortedEntries = SortedEntries.drop_front(Batch.size());
+  }
+  // Final step to fill some unpropagated holes in the SymbolIdx (d for the
+  // above symbol).
+  for (size_t I = 1; I < Table.TerminalIdx.size(); ++I)
+    Table.TerminalIdx[I] =
+        std::max(Table.TerminalIdx[I], Table.TerminalIdx[I - 1]);
+  for (size_t I = 1; I < Table.NontermIdx.size(); ++I)
+    Table.NontermIdx[I] =
+        std::max(Table.NontermIdx[I], Table.NontermIdx[I - 1]);
+  return Table;
+}
+
+} // namespace pseudo
+} // namespace syntax
+} // namespace clang
Index: clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
+++ clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
@@ -21,17 +21,6 @@
 static const llvm::StringRef OptSuffix = "_opt";
 static const llvm::StringRef StartSymbol = "_";
 
-void initTerminals(std::vector<std::string> &Out) {
-  Out.clear();
-  Out.reserve(NumTerminals);
-  for (unsigned I = 0; I < NumTerminals; ++I) {
-    tok::TokenKind K = static_cast<tok::TokenKind>(I);
-    if (const auto *Punc = tok::getPunctuatorSpelling(K))
-      Out.push_back(Punc);
-    else
-      Out.push_back(llvm::StringRef(tok::getTokenName(K)).upper());
-  }
-}
 // Builds grammar from BNF files.
 class GrammarBuilder {
 public:
Index: clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp
+++ clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp
@@ -163,6 +163,17 @@
   return FollowSets;
 }
 
+void initTerminals(std::vector<std::string> &Terminals) {
+  Terminals.clear();
+  Terminals.reserve(NumTerminals);
+  for (unsigned I = 0; I < NumTerminals; ++I) {
+    tok::TokenKind K = static_cast<tok::TokenKind>(I);
+    if (const auto *Punc = tok::getPunctuatorSpelling(K))
+      Terminals.push_back(Punc);
+    else
+      Terminals.push_back(llvm::StringRef(tok::getTokenName(K)).upper());
+  }
+}
 } // namespace pseudo
 } // namespace syntax
 } // namespace clang
Index: clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt
+++ clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt
@@ -4,7 +4,9 @@
   Grammar.cpp
   GrammarBNF.cpp
   LRGraph.cpp
-  
+  LRTable.cpp
+  LRTableBuild.cpp
+
   LINK_LIBS
   clangBasic
   clangLex
Index: clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h
@@ -0,0 +1,199 @@
+//===--- LRTable.h - Define LR Parsing Table ---------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  The LRTable (referred as LR parsing table in the LR literature) is the core
+//  component in LR parsers, it drives the LR parsers by specifying an action to
+//  take given the current state on the top of the stack and the current
+//  lookahead token.
+//
+//  The LRTable can be described as a matrix where the rows represent
+//  the states of the LR graph, the columns represent the symbols of the
+//  grammar, and each entry of the matrix (called action) represents a
+//  state transition in the graph.
+//
+//  Typically, based on the category of the grammar symbol, the LRTable is
+//  broken into two logically separate tables:
+//    - ACTION table with terminals as columns -- e.g ACTION[S, a] specifies
+//      next action (shift/reduce/accept/error) on state S under a lookahead
+//      terminal a
+//    - GOTO table with nonterminals as columns -- e.g. GOTO[S, X] specify
+//      the state which we transist to from the state S with the nonterminal X
+//
+//  LRTable is *performance-critial* as it is consulted frequently during a
+//  parse. In general, LRTable is very sprase (most of the entries are empty).
+//  For example, for the C++ language, the SLR table has ~1500 states and 650
+//  symbols which results in a matrix having 975K entries, ~90% of entries are
+//  empty.
+//
+//  This file implements a speed-and-space-efficient LRTable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H
+#define LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H
+
+#include "clang/Tooling/Syntax/Pseudo/Grammar.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include <cstdint>
+#include <vector>
+
+namespace clang {
+namespace syntax {
+namespace pseudo {
+
+// Represents the LR parsing table, which can efficiently the question "what is
+// the next step given the lookahead token and current state on top of the
+// stack?".
+//
+// This is a dense implementation, which only takes an amount of space that is
+// proportional to the number of non-empty entries in the table.
+//
+// Unlike A typical LR parsing table allows at most one available action per
+// entry, conflicted actions are allowed in LRTable.
+class LRTable {
+public:
+  using StateID = uint16_t;
+  // Action represents the terminal and nonterminal actions, it combines the
+  // entry of the ACTION and GOTO tables from the LR literature.
+  class Action {
+  public:
+    enum Kind : uint8_t {
+      // Terminal actions, corresponding to entries of ACTION table.
+      Error = 0,
+      // Shift to state n: move forward with the lookahead, and push state n
+      // onto the state stack.
+      // A shift is a forward transition, and the value n is the next state that
+      // the parser is to enter.
+      Shift,
+      // Reduce by a rule, the value is a ruleID.
+      Reduce,
+      // Signals that we have parsd the input successfully.
+      Accept,
+
+      // Nonterminal actions, corresponding to entry of GOTO table.
+      // Go to state n: pust state n onto the state stack.
+      // Similar to Shift, but it is a nonterminal forward transition.
+      GoTo,
+    };
+
+    static Action accept() { return Action(Accept, 0); }
+    static Action goTo(StateID S) { return Action(GoTo, S); }
+    static Action shift(StateID S) { return Action(Shift, S); }
+    static Action reduce(RuleID RID) { return Action(Reduce, RID); }
+    static Action sentinel() { return Action(Error, 0); }
+
+    StateID getShiftState() const {
+      assert(kind() == Shift);
+      return Value;
+    }
+    StateID getGoToState() const {
+      assert(kind() == GoTo);
+      return Value;
+    }
+    RuleID getReduceRule() const {
+      assert(kind() == Reduce);
+      return Value;
+    }
+    Kind kind() const { return K; }
+
+    bool operator==(const Action &L) const { return value() == L.value(); }
+    uint16_t value() const { return K << ValueBits | Value; };
+
+  private:
+    Action(Kind K1, uint16_t Value) : K(K1), Value(Value) {}
+    static constexpr int ValueBits = 13;
+    static constexpr int KindBits = 3;
+    Kind K : KindBits;
+    // Either StateID or RuleID, depending on the Kind.
+    uint16_t Value : ValueBits;
+  };
+
+  // Returns all available actions for the given state on a terminal.
+  llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
+  // Returns the state after we reduce a nonterminal.
+  StateID getGoToState(StateID State, SymbolID Nonterminal) const;
+
+  // Looks up available actions.
+  // Returns empty if no available actions in the table.
+  llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
+
+  size_t bytes() const {
+    return sizeof(*this) + Actions.capacity() * sizeof(Action) +
+           StateIdx.capacity() * sizeof(StateID) +
+           NontermIdx.capacity() * sizeof(uint32_t) +
+           TerminalIdx.capacity() * sizeof(uint32_t);
+  }
+
+  std::string dumpStatistics() const;
+  std::string dumpForTests(const Grammar &G) const;
+
+  class Builder {
+  public:
+    bool insert(StateID State, SymbolID Symbol, LRTable::Action A);
+    LRTable build(const GrammarTable &) &&;
+
+  private:
+    struct Entry {
+      StateID State;
+      SymbolID Symbol;
+      Action Act;
+    };
+    friend struct llvm::DenseMapInfo<Entry>;
+    llvm::DenseSet<Entry> Entries;
+  };
+  // Build a SLR(1) parsing table.
+  static LRTable buildSLR(const Grammar &G);
+
+private:
+  // Index is nonterminal SymbolID, values are indices of StateIdx.
+  // Give a non-terminal id, the corresponding half-open range of StateIdx is
+  // [NontermIdx[id], NontermIdx[id+1]).
+  std::vector<uint32_t> NontermIdx;
+  // Similar to NontermIdx, but for terminals.
+  // Index is tok::TokenKind.
+  std::vector<uint32_t> TerminalIdx;
+  // Parallel to Actions, the value is Src state (rows of the matrix).
+  // Grouped by the SymbolID, and only subranges are sorted.
+  std::vector<StateID> StateIdx;
+  // A flat list of available (non-error) actions.
+  // Sorted by (SymbolID, State), concepetually, an action is identified by a
+  // row (State) and a column (SymbolID) in a matrix.
+  std::vector<Action> Actions;
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
+
+} // namespace pseudo
+} // namespace syntax
+} // namespace clang
+
+namespace llvm {
+template <>
+struct DenseMapInfo<clang::syntax::pseudo::LRTable::Builder::Entry> {
+  using Entry = clang::syntax::pseudo::LRTable::Builder::Entry;
+  static inline Entry getEmptyKey() {
+    static Entry E{static_cast<clang::syntax::pseudo::SymbolID>(-1), 0,
+                   clang::syntax::pseudo::LRTable::Action::sentinel()};
+    return E;
+  }
+  static inline Entry getTombstoneKey() {
+    static Entry E{static_cast<clang::syntax::pseudo::SymbolID>(-2), 0,
+                   clang::syntax::pseudo::LRTable::Action::sentinel()};
+    return E;
+  }
+  static unsigned getHashValue(const Entry &I) {
+    return llvm::hash_combine(I.State, I.Symbol, I.Act.value());
+  }
+  static bool isEqual(const Entry &LHS, const Entry &RHS) {
+    return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
+           LHS.Act == RHS.Act;
+  }
+};
+} // namespace llvm
+
+#endif // LLVM_CLANG_TOOLING_SYNTAX_PSEUDO_LRTABLE_H
Index: clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
+++ clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
@@ -174,6 +174,7 @@
   // SymbolID is the index of the table.
   std::vector<Nonterminal> Nonterminals;
 };
+void initTerminals(std::vector<std::string> &Terminals);
 
 } // namespace pseudo
 } // namespace syntax
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to