sstwcw created this revision.
sstwcw added reviewers: MyDeveloperDay, HazardyKnusperkeks, curdeius, owenpan.
Herald added a subscriber: mgorny.
Herald added a project: All.
sstwcw requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This patch mainly handles treating `begin` as block openers.

While and for statements will be handled in another patch.

We added an alias option in FormatToken to treat a token as another
token.  This way when some language uses an alternative symbol like
backtick for hash we can continue using tok::hash in the code.  For
keywords like `begin`, left braces aren't block openers in if
expressions while they are in structs and enums.  That means we can't
simply treat `begin` and the left brace the same way.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D123450

Files:
  clang/docs/ClangFormat.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/Format.cpp
  clang/lib/Format/FormatToken.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/QualifierAlignmentFixer.cpp
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/UnwrappedLineFormatter.cpp
  clang/lib/Format/UnwrappedLineParser.cpp
  clang/tools/clang-format/ClangFormat.cpp
  clang/unittests/Format/CMakeLists.txt
  clang/unittests/Format/FormatTestUtils.h
  clang/unittests/Format/FormatTestVerilog.cpp

Index: clang/unittests/Format/FormatTestVerilog.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -0,0 +1,118 @@
+//===- unittest/Format/FormatTestVerilog.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestUtils.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+#include "gtest/gtest.h"
+
+#define DEBUG_TYPE "format-test"
+
+namespace clang {
+namespace format {
+
+class FormatTestVerilog : public ::testing::Test {
+protected:
+  static std::string format(llvm::StringRef Code, unsigned Offset,
+                            unsigned Length, const FormatStyle &Style) {
+    LLVM_DEBUG(llvm::errs() << "---\n");
+    LLVM_DEBUG(llvm::errs() << Code << "\n\n");
+    std::vector<tooling::Range> Ranges(1, tooling::Range(Offset, Length));
+    tooling::Replacements Replaces = reformat(Style, Code, Ranges);
+    auto Result = applyAllReplacements(Code, Replaces);
+    EXPECT_TRUE(static_cast<bool>(Result));
+    LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
+    return *Result;
+  }
+
+  static std::string format(llvm::StringRef Code, const FormatStyle &Style) {
+    return format(Code, 0, Code.size(), Style);
+  }
+
+  static void verifyFormat(
+      llvm::StringRef Code,
+      const FormatStyle &Style = getLLVMStyle(FormatStyle::LK_Verilog)) {
+    EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
+    EXPECT_EQ(Code.str(),
+              format(test::messUp(Code, /*HandleHash=*/false), Style));
+  }
+};
+
+TEST_F(FormatTestVerilog, If) {
+  verifyFormat("if (x)\n"
+               "  x = x;");
+  verifyFormat("if (x)\n"
+               "  x = x;\n"
+               "x = x;");
+
+  // Test else
+  verifyFormat("if (x)\n"
+               "  x = x;\n"
+               "else if (x)\n"
+               "  x = x;\n"
+               "else\n"
+               "  x = x;");
+  verifyFormat("if (x) begin\n"
+               "  x = x;\n"
+               "end else if (x) begin\n"
+               "  x = x;\n"
+               "end else begin\n"
+               "  x = x;\n"
+               "end");
+  verifyFormat("if (x) begin : x\n"
+               "  x = x;\n"
+               "end : x else if (x) begin : x\n"
+               "  x = x;\n"
+               "end : x else begin : x\n"
+               "  x = x;\n"
+               "end : x");
+
+  // Test block keywords.
+  verifyFormat("if (x) begin\n"
+               "  x = x;\n"
+               "end");
+  verifyFormat("if (x) begin : x\n"
+               "  x = x;\n"
+               "end : x");
+  verifyFormat("if (x) begin\n"
+               "  x = x;\n"
+               "  x = x;\n"
+               "end");
+  verifyFormat("disable fork;\n"
+               "x = x;");
+  verifyFormat("rand join x x;\n"
+               "x = x;");
+  verifyFormat("if (x) fork\n"
+               "  x = x;\n"
+               "join");
+  verifyFormat("if (x) fork\n"
+               "  x = x;\n"
+               "join_any");
+  verifyFormat("if (x) fork\n"
+               "  x = x;\n"
+               "join_none");
+  verifyFormat("if (x) generate\n"
+               "  x = x;\n"
+               "endgenerate");
+  verifyFormat("if (x) generate : x\n"
+               "  x = x;\n"
+               "endgenerate : x");
+
+  // Test that concatenation braces don't get regarded as blocks.
+  verifyFormat("if (x)\n"
+               "  {x} = x;");
+  verifyFormat("if (x)\n"
+               "  x = {x};");
+  verifyFormat("if (x)\n"
+               "  x = {x};\n"
+               "else\n"
+               "  {x} = {x};");
+}
+
+} // namespace format
+} // end namespace clang
Index: clang/unittests/Format/FormatTestUtils.h
===================================================================
--- clang/unittests/Format/FormatTestUtils.h
+++ clang/unittests/Format/FormatTestUtils.h
@@ -19,7 +19,10 @@
 namespace format {
 namespace test {
 
-inline std::string messUp(llvm::StringRef Code) {
+// When HandleHash is false, preprocessor directives starting with hash will not
+// be on separate lines.  This is needed because Verilog uses hash for other
+// purposes.
+inline std::string messUp(llvm::StringRef Code, bool HandleHash = true) {
   std::string MessedUp(Code.str());
   bool InComment = false;
   bool InPreprocessorDirective = false;
@@ -29,7 +32,7 @@
       if (JustReplacedNewline)
         MessedUp[i - 1] = '\n';
       InComment = true;
-    } else if (MessedUp[i] == '#' &&
+    } else if (HandleHash && MessedUp[i] == '#' &&
                (JustReplacedNewline || i == 0 || MessedUp[i - 1] == '\n')) {
       if (i != 0)
         MessedUp[i - 1] = '\n';
Index: clang/unittests/Format/CMakeLists.txt
===================================================================
--- clang/unittests/Format/CMakeLists.txt
+++ clang/unittests/Format/CMakeLists.txt
@@ -17,6 +17,7 @@
   FormatTestSelective.cpp
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
+  FormatTestVerilog.cpp
   MacroExpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   QualifierFixerTest.cpp
Index: clang/tools/clang-format/ClangFormat.cpp
===================================================================
--- clang/tools/clang-format/ClangFormat.cpp
+++ clang/tools/clang-format/ClangFormat.cpp
@@ -79,7 +79,18 @@
     "assume-filename",
     cl::desc("Override filename used to determine the language.\n"
              "When reading from stdin, clang-format assumes this\n"
-             "filename to determine the language."),
+             "filename to determine the language.\n"
+             "Unrecognized filenames are treated as C++.\n"
+             "supported:\n"
+             "  CSharp: .cs\n"
+             "  Java: .java\n"
+             "  JavaScript: .mjs .js .ts\n"
+             "  Json: .json\n"
+             "  Objective-C: .m .mm\n"
+             "  Proto: .proto .protodevel\n"
+             "  TableGen: .td\n"
+             "  TextProto: .textpb .pb.txt .textproto .asciipb\n"
+             "  Verilog: .sv .svh .v .vh"),
     cl::init("<stdin>"), cl::cat(ClangFormatCategory));
 
 static cl::opt<bool> Inplace("i",
Index: clang/lib/Format/UnwrappedLineParser.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineParser.cpp
+++ clang/lib/Format/UnwrappedLineParser.cpp
@@ -413,7 +413,7 @@
 
 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
   do {
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::l_brace:
       return;
     default:
@@ -432,7 +432,7 @@
 void UnwrappedLineParser::parseCSharpAttribute() {
   int UnpairedSquareBrackets = 1;
   do {
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::r_square:
       nextToken();
       --UnpairedSquareBrackets;
@@ -484,7 +484,7 @@
       nextToken();
       continue;
     }
-    tok::TokenKind kind = FormatTok->Tok.getKind();
+    tok::TokenKind kind = FormatTok->getKind();
     if (FormatTok->getType() == TT_MacroBlockBegin)
       kind = tok::l_brace;
     else if (FormatTok->getType() == TT_MacroBlockEnd)
@@ -604,7 +604,7 @@
       NextTok = Tokens->getNextToken();
     } while (NextTok->is(tok::comment));
 
-    switch (Tok->Tok.getKind()) {
+    switch (Tok->getKind()) {
     case tok::l_brace:
       if (Style.isJavaScript() && PrevTok) {
         if (PrevTok->isOneOf(tok::colon, tok::less))
@@ -756,7 +756,19 @@
                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
                                 bool CanContainBracedList,
                                 TokenType NextLBracesType) {
-  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
+  auto HandleVerilogBlockLabel = [this]() {
+    // ":" name
+    if (Style.Language == FormatStyle::LK_Verilog &&
+        FormatTok->is(tok::colon)) {
+      nextToken();
+      if (Keywords.isVerilogIdentifier(*FormatTok))
+        nextToken();
+    }
+  };
+
+  assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
+          (Style.Language == FormatStyle::LK_Verilog &&
+           Keywords.isVerilogBegin(*FormatTok))) &&
          "'{' or macro block token expected");
   FormatToken *Tok = FormatTok;
   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
@@ -771,6 +783,7 @@
 
   unsigned InitialLevel = Line->Level;
   nextToken(/*LevelDifference=*/AddLevels);
+  HandleVerilogBlockLabel();
 
   if (MacroBlock && FormatTok->is(tok::l_paren))
     parseParens();
@@ -822,6 +835,7 @@
 
   // Munch the closing brace.
   nextToken(/*LevelDifference=*/-AddLevels);
+  HandleVerilogBlockLabel();
 
   if (MacroBlock && FormatTok->is(tok::l_paren))
     parseParens();
@@ -893,7 +907,7 @@
 
 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
                                    const FormatToken &InitialToken) {
-  tok::TokenKind Kind = InitialToken.Tok.getKind();
+  tok::TokenKind Kind = InitialToken.getKind();
   if (InitialToken.is(TT_NamespaceMacro))
     Kind = tok::kw_namespace;
 
@@ -1104,8 +1118,7 @@
   FormatTok->Tok.setKind(tok::identifier);
   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
   nextToken();
-  if (FormatTok->Tok.getKind() == tok::l_paren &&
-      !FormatTok->hasWhitespaceBefore())
+  if (FormatTok->getKind() == tok::l_paren && !FormatTok->hasWhitespaceBefore())
     parseParens();
   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
     Line->Level += PPBranchLevel + 1;
@@ -1332,7 +1345,7 @@
     addUnwrappedLine();
     return;
   }
-  switch (FormatTok->Tok.getKind()) {
+  switch (FormatTok->getKind()) {
   case tok::kw_asm:
     nextToken();
     if (FormatTok->is(tok::l_brace)) {
@@ -1508,7 +1521,7 @@
   }
   do {
     const FormatToken *Previous = FormatTok->Previous;
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::at:
       nextToken();
       if (FormatTok->is(tok::l_brace)) {
@@ -1886,7 +1899,7 @@
     addUnwrappedLine();
   nextToken();
   do {
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::r_brace:
       nextToken();
       if (FormatTok->is(tok::equal)) {
@@ -1948,7 +1961,7 @@
       nextToken();
       continue;
     }
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::l_brace:
       break;
     case tok::l_paren:
@@ -2143,13 +2156,13 @@
         parseChildBlock();
       }
     }
-    if (FormatTok->Tok.getKind() == ClosingBraceKind) {
+    if (FormatTok->getKind() == ClosingBraceKind) {
       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
         addUnwrappedLine();
       nextToken();
       return !HasError;
     }
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::l_square:
       if (Style.isCSharp())
         parseSquare();
@@ -2216,7 +2229,7 @@
   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
   nextToken();
   do {
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::l_paren:
       parseParens();
       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
@@ -2286,7 +2299,7 @@
       return;
   }
   do {
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::l_paren:
       parseParens();
       break;
@@ -2435,7 +2448,7 @@
   FormatToken *IfLeftBrace = nullptr;
   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
 
-  if (FormatTok->is(tok::l_brace)) {
+  if (Keywords.isBlockBegin(*FormatTok, Style)) {
     IfLeftBrace = FormatTok;
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     IfBlockKind = parseBlock();
@@ -2465,7 +2478,7 @@
     }
     nextToken();
     handleAttributes();
-    if (FormatTok->is(tok::l_brace)) {
+    if (Keywords.isBlockBegin(*FormatTok, Style)) {
       ElseLeftBrace = FormatTok;
       CompoundStatementIndenter Indenter(this, Style, Line->Level);
       if (parseBlock() == IfStmtKind::IfOnly)
@@ -2717,7 +2730,7 @@
                                         bool WrapRightBrace) {
   keepAncestorBraces();
 
-  if (FormatTok->is(tok::l_brace)) {
+  if (Keywords.isBlockBegin(*FormatTok, Style)) {
     FormatToken *LeftBrace = FormatTok;
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     parseBlock();
@@ -2916,7 +2929,7 @@
     nextToken();
     addUnwrappedLine();
   } else if (!FormatTok->is(tok::coloncolon) &&
-             !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
+             !isCOperatorFollowingVar(FormatTok->getKind())) {
     // Not a variable name nor namespace name.
     addUnwrappedLine();
   } else if (AccessSpecifierCandidate) {
@@ -2956,7 +2969,7 @@
   // that we first consume the keyword and check the next token.
   nextToken();
 
-  switch (FormatTok->Tok.getKind()) {
+  switch (FormatTok->getKind()) {
   case tok::l_brace:
     // This can only be an expression, never a clause.
     parseRequiresExpression(RequiresToken);
@@ -2987,7 +3000,7 @@
     return true;
   }
 
-  switch (PreviousNonComment->Tok.getKind()) {
+  switch (PreviousNonComment->getKind()) {
   case tok::greater:
   case tok::r_paren:
   case tok::kw_noexcept:
@@ -3034,7 +3047,7 @@
   int OpenAngles = 0;
 
   for (; NextTokenOffset < 50; PeekNext()) {
-    switch (NextToken->Tok.getKind()) {
+    switch (NextToken->getKind()) {
     case tok::kw_volatile:
     case tok::kw_const:
     case tok::comma:
@@ -3148,7 +3161,7 @@
   do {
     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
 
-    switch (FormatTok->Tok.getKind()) {
+    switch (FormatTok->getKind()) {
     case tok::kw_requires: {
       auto RequiresToken = FormatTok;
       nextToken();
@@ -3252,7 +3265,7 @@
       // ended before that), and basically all other cases. But it's easier to
       // check the other way around.
       assert(FormatTok->Previous);
-      switch (FormatTok->Previous->Tok.getKind()) {
+      switch (FormatTok->Previous->getKind()) {
       case tok::coloncolon:  // Nested identifier.
       case tok::ampamp:      // Start of a function or variable for the
       case tok::pipepipe:    // constraint expression.
@@ -3555,7 +3568,7 @@
   }
 
   auto GetBraceType = [](const FormatToken &RecordTok) {
-    switch (RecordTok.Tok.getKind()) {
+    switch (RecordTok.getKind()) {
     case tok::kw_class:
       return TT_ClassLBrace;
     case tok::kw_struct:
@@ -3984,6 +3997,16 @@
   else
     readTokenWithJavaScriptASI();
   FormatTok->Previous = Previous;
+  if (Style.isVerilog()) {
+    // Blocks in Verilog can have `begin` and `end` instead of braces.  For
+    // keywords like `begin`, we can't treat them the same as left braces
+    // because some contexts require one of them.  For example structs use
+    // braces and if blocks use keywords, and a left brace can occur in an if
+    // statement, but it is not a block.  For keywords like `end`, we simply
+    // treat them the same as right braces.
+    if (Keywords.isVerilogEnd(*FormatTok))
+      FormatTok->AliasToken = tok::r_brace;
+  }
 }
 
 void UnwrappedLineParser::distributeComments(
Index: clang/lib/Format/UnwrappedLineFormatter.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineFormatter.cpp
+++ clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -425,7 +425,7 @@
       }
     }
     if (PreviousLine && TheLine->First->is(tok::l_brace)) {
-      switch (PreviousLine->First->Tok.getKind()) {
+      switch (PreviousLine->First->getKind()) {
       case tok::at:
         // Don't merge block with left brace wrapped after ObjC special blocks.
         if (PreviousLine->First->Next) {
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -182,7 +182,7 @@
   bool parseUntouchableParens() {
     while (CurrentToken) {
       CurrentToken->Finalized = true;
-      switch (CurrentToken->Tok.getKind()) {
+      switch (CurrentToken->getKind()) {
       case tok::l_paren:
         next();
         if (!parseUntouchableParens())
@@ -573,8 +573,7 @@
          Parent->isUnaryOperator() ||
          // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
-         (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
-          prec::Unknown));
+         (getBinOpPrecedence(Parent->getKind(), true, true) > prec::Unknown));
     bool ColonFound = false;
 
     unsigned BindingIncrease = 1;
@@ -876,7 +875,7 @@
   bool consumeToken() {
     FormatToken *Tok = CurrentToken;
     next();
-    switch (Tok->Tok.getKind()) {
+    switch (Tok->getKind()) {
     case tok::plus:
     case tok::minus:
       if (!Tok->Previous && Line.MustBeDeclaration)
Index: clang/lib/Format/QualifierAlignmentFixer.cpp
===================================================================
--- clang/lib/Format/QualifierAlignmentFixer.cpp
+++ clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -469,7 +469,7 @@
 bool LeftRightQualifierAlignmentFixer::isQualifierOrType(
     const FormatToken *Tok, const std::vector<tok::TokenKind> &specifiedTypes) {
   return Tok && (Tok->isSimpleTypeSpecifier() || Tok->is(tok::kw_auto) ||
-                 llvm::is_contained(specifiedTypes, Tok->Tok.getKind()));
+                 llvm::is_contained(specifiedTypes, Tok->getKind()));
 }
 
 // If a token is an identifier and it's upper case, it could
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -369,6 +369,10 @@
   }
   bool isTypeFinalized() const { return TypeIsFinalized; }
 
+  /// Used to treat a token as if it were something else.  For example, in
+  /// Verilog we want to treat the backtick like a hash.
+  tok::TokenKind AliasToken = tok::unknown;
+
   /// The number of newlines immediately before the \c Token.
   ///
   /// This can be used to determine what the user wrote in the original code
@@ -498,7 +502,10 @@
   // in a configured macro expansion.
   llvm::Optional<MacroExpansion> MacroCtx;
 
-  bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
+  bool is(tok::TokenKind Kind) const {
+    // We don't use `Tok.is` here because it doesn't consider the alias.
+    return getKind() == Kind;
+  }
   bool is(TokenType TT) const { return getType() == TT; }
   bool is(const IdentifierInfo *II) const {
     return II && II == Tok.getIdentifierInfo();
@@ -519,6 +526,10 @@
   }
   template <typename T> bool isNot(T Kind) const { return !is(Kind); }
 
+  tok::TokenKind getKind() const {
+    return AliasToken == tok::unknown ? Tok.getKind() : AliasToken;
+  }
+
   bool isIf(bool AllowConstexprMacro = true) const {
     return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
            (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
@@ -574,7 +585,7 @@
     return endsSequenceInternal(K1, Tokens...);
   }
 
-  bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
+  bool isStringLiteral() const { return tok::isStringLiteral(getKind()); }
 
   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
     return Tok.isObjCAtKeyword(Kind);
@@ -634,7 +645,7 @@
   }
 
   bool isUnaryOperator() const {
-    switch (Tok.getKind()) {
+    switch (getKind()) {
     case tok::plus:
     case tok::plusplus:
     case tok::minus:
@@ -662,7 +673,7 @@
   /// Returns \c true if this is a keyword that can be used
   /// like a function call (e.g. sizeof, typeid, ...).
   bool isFunctionLikeKeyword() const {
-    switch (Tok.getKind()) {
+    switch (getKind()) {
     case tok::kw_throw:
     case tok::kw_typeid:
     case tok::kw_return:
@@ -713,7 +724,7 @@
   }
 
   prec::Level getPrecedence() const {
-    return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
+    return getBinOpPrecedence(getKind(), /*GreaterThanIsOperator=*/true,
                               /*CPlusPlus11=*/true);
   }
 
@@ -1003,6 +1014,118 @@
     kw_when = &IdentTable.get("when");
     kw_where = &IdentTable.get("where");
 
+    kw_always = &IdentTable.get("always");
+    kw_always_comb = &IdentTable.get("always_comb");
+    kw_always_ff = &IdentTable.get("always_ff");
+    kw_always_latch = &IdentTable.get("always_latch");
+    kw_assign = &IdentTable.get("assign");
+    kw_assume = &IdentTable.get("assume");
+    kw_automatic = &IdentTable.get("automatic");
+    kw_before = &IdentTable.get("before");
+    kw_begin = &IdentTable.get("begin");
+    kw_bins = &IdentTable.get("bins");
+    kw_binsof = &IdentTable.get("binsof");
+    kw_casex = &IdentTable.get("casex");
+    kw_casez = &IdentTable.get("casez");
+    kw_celldefine = &IdentTable.get("celldefine");
+    kw_checker = &IdentTable.get("checker");
+    kw_clocking = &IdentTable.get("clocking");
+    kw_constraint = &IdentTable.get("constraint");
+    kw_cover = &IdentTable.get("cover");
+    kw_covergroup = &IdentTable.get("covergroup");
+    kw_coverpoint = &IdentTable.get("coverpoint");
+    kw_disable = &IdentTable.get("disable");
+    kw_dist = &IdentTable.get("dist");
+    kw_end = &IdentTable.get("end");
+    kw_endcase = &IdentTable.get("endcase");
+    kw_endchecker = &IdentTable.get("endchecker");
+    kw_endclass = &IdentTable.get("endclass");
+    kw_endclocking = &IdentTable.get("endclocking");
+    kw_endfunction = &IdentTable.get("endfunction");
+    kw_endgenerate = &IdentTable.get("endgenerate");
+    kw_endgroup = &IdentTable.get("endgroup");
+    kw_endinterface = &IdentTable.get("endinterface");
+    kw_endmodule = &IdentTable.get("endmodule");
+    kw_endpackage = &IdentTable.get("endpackage");
+    kw_endprimitive = &IdentTable.get("endprimitive");
+    kw_endprogram = &IdentTable.get("endprogram");
+    kw_endproperty = &IdentTable.get("endproperty");
+    kw_endsequence = &IdentTable.get("endsequence");
+    kw_endspecify = &IdentTable.get("endspecify");
+    kw_endtable = &IdentTable.get("endtable");
+    kw_endtask = &IdentTable.get("endtask");
+    kw_forever = &IdentTable.get("forever");
+    kw_fork = &IdentTable.get("fork");
+    kw_generate = &IdentTable.get("generate");
+    kw_highz0 = &IdentTable.get("highz0");
+    kw_highz1 = &IdentTable.get("highz1");
+    kw_iff = &IdentTable.get("iff");
+    kw_ifnone = &IdentTable.get("ifnone");
+    kw_ignore_bins = &IdentTable.get("ignore_bins");
+    kw_illegal_bins = &IdentTable.get("illegal_bins");
+    kw_initial = &IdentTable.get("initial");
+    kw_inout = &IdentTable.get("inout");
+    kw_input = &IdentTable.get("input");
+    kw_inside = &IdentTable.get("inside");
+    kw_interconnect = &IdentTable.get("interconnect");
+    kw_intersect = &IdentTable.get("intersect");
+    kw_join = &IdentTable.get("join");
+    kw_join_any = &IdentTable.get("join_any");
+    kw_join_none = &IdentTable.get("join_none");
+    kw_large = &IdentTable.get("large");
+    kw_local = &IdentTable.get("local");
+    kw_localparam = &IdentTable.get("localparam");
+    kw_macromodule = &IdentTable.get("macromodule");
+    kw_matches = &IdentTable.get("matches");
+    kw_medium = &IdentTable.get("medium");
+    kw_output = &IdentTable.get("output");
+    kw_packed = &IdentTable.get("packed");
+    kw_parameter = &IdentTable.get("parameter");
+    kw_primitive = &IdentTable.get("primitive");
+    kw_priority = &IdentTable.get("priority");
+    kw_program = &IdentTable.get("program");
+    kw_property = &IdentTable.get("property");
+    kw_pull0 = &IdentTable.get("pull0");
+    kw_pull1 = &IdentTable.get("pull1");
+    kw_pure = &IdentTable.get("pure");
+    kw_rand = &IdentTable.get("rand");
+    kw_randc = &IdentTable.get("randc");
+    kw_randcase = &IdentTable.get("randcase");
+    kw_randsequence = &IdentTable.get("randsequence");
+    kw_repeat = &IdentTable.get("repeat");
+    kw_sample = &IdentTable.get("sample");
+    kw_scalared = &IdentTable.get("scalared");
+    kw_sequence = &IdentTable.get("sequence");
+    kw_small = &IdentTable.get("small");
+    kw_soft = &IdentTable.get("soft");
+    kw_solve = &IdentTable.get("solve");
+    kw_specify = &IdentTable.get("specify");
+    kw_specparam = &IdentTable.get("specparam");
+    kw_strong0 = &IdentTable.get("strong0");
+    kw_strong1 = &IdentTable.get("strong1");
+    kw_supply0 = &IdentTable.get("supply0");
+    kw_supply1 = &IdentTable.get("supply1");
+    kw_table = &IdentTable.get("table");
+    kw_tagged = &IdentTable.get("tagged");
+    kw_task = &IdentTable.get("task");
+    kw_tri = &IdentTable.get("tri");
+    kw_tri0 = &IdentTable.get("tri0");
+    kw_tri1 = &IdentTable.get("tri1");
+    kw_triand = &IdentTable.get("triand");
+    kw_trior = &IdentTable.get("trior");
+    kw_trireg = &IdentTable.get("trireg");
+    kw_unique = &IdentTable.get("unique");
+    kw_unique0 = &IdentTable.get("unique0");
+    kw_uwire = &IdentTable.get("uwire");
+    kw_vectored = &IdentTable.get("vectored");
+    kw_wand = &IdentTable.get("wand");
+    kw_weak0 = &IdentTable.get("weak0");
+    kw_weak1 = &IdentTable.get("weak1");
+    kw_wildcard = &IdentTable.get("wildcard");
+    kw_wire = &IdentTable.get("wire");
+    kw_with = &IdentTable.get("with");
+    kw_wor = &IdentTable.get("wor");
+
     // Keep this at the end of the constructor to make sure everything here
     // is
     // already initialized.
@@ -1026,6 +1149,42 @@
          kw_set, kw_type, kw_typeof, kw_var, kw_yield,
          // Keywords from the Java section.
          kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
+
+    // Some keywords are not included here because they don't need special
+    // treatment like `showcancelled` or they should be treated as identifiers
+    // like `int` and `logic`.
+    VerilogExtraKeywords = std::unordered_set<IdentifierInfo *>(
+        {kw_always,       kw_always_comb,  kw_always_ff,    kw_always_latch,
+         kw_assert,       kw_assign,       kw_assume,       kw_automatic,
+         kw_before,       kw_begin,        kw_bins,         kw_binsof,
+         kw_casex,        kw_casez,        kw_celldefine,   kw_checker,
+         kw_clocking,     kw_constraint,   kw_cover,        kw_covergroup,
+         kw_coverpoint,   kw_disable,      kw_dist,         kw_end,
+         kw_endcase,      kw_endchecker,   kw_endclass,     kw_endclocking,
+         kw_endfunction,  kw_endgenerate,  kw_endgroup,     kw_endinterface,
+         kw_endmodule,    kw_endpackage,   kw_endprimitive, kw_endprogram,
+         kw_endproperty,  kw_endsequence,  kw_endspecify,   kw_endtable,
+         kw_endtask,      kw_extends,      kw_final,        kw_foreach,
+         kw_forever,      kw_fork,         kw_function,     kw_generate,
+         kw_highz0,       kw_highz1,       kw_iff,          kw_ifnone,
+         kw_ignore_bins,  kw_illegal_bins, kw_implements,   kw_import,
+         kw_initial,      kw_inout,        kw_input,        kw_inside,
+         kw_interconnect, kw_interface,    kw_intersect,    kw_join,
+         kw_join_any,     kw_join_none,    kw_large,        kw_let,
+         kw_local,        kw_localparam,   kw_macromodule,  kw_matches,
+         kw_medium,       kw_output,       kw_package,      kw_packed,
+         kw_parameter,    kw_primitive,    kw_priority,     kw_program,
+         kw_property,     kw_pull0,        kw_pull1,        kw_pure,
+         kw_rand,         kw_randc,        kw_randcase,     kw_randsequence,
+         kw_ref,          kw_repeat,       kw_sample,       kw_scalared,
+         kw_sequence,     kw_small,        kw_soft,         kw_solve,
+         kw_specify,      kw_specparam,    kw_strong0,      kw_strong1,
+         kw_supply0,      kw_supply1,      kw_table,        kw_tagged,
+         kw_task,         kw_tri,          kw_tri0,         kw_tri1,
+         kw_triand,       kw_trior,        kw_trireg,       kw_unique,
+         kw_unique0,      kw_uwire,        kw_var,          kw_vectored,
+         kw_wand,         kw_weak0,        kw_weak1,        kw_wildcard,
+         kw_wire,         kw_with,         kw_wor});
   }
 
   // Context sensitive keywords.
@@ -1130,6 +1289,119 @@
   IdentifierInfo *kw_when;
   IdentifierInfo *kw_where;
 
+  // Verilog keywords
+  IdentifierInfo *kw_always;
+  IdentifierInfo *kw_always_comb;
+  IdentifierInfo *kw_always_ff;
+  IdentifierInfo *kw_always_latch;
+  IdentifierInfo *kw_assign;
+  IdentifierInfo *kw_assume;
+  IdentifierInfo *kw_automatic;
+  IdentifierInfo *kw_before;
+  IdentifierInfo *kw_begin;
+  IdentifierInfo *kw_bins;
+  IdentifierInfo *kw_binsof;
+  IdentifierInfo *kw_casex;
+  IdentifierInfo *kw_casez;
+  IdentifierInfo *kw_celldefine;
+  IdentifierInfo *kw_checker;
+  IdentifierInfo *kw_clocking;
+  IdentifierInfo *kw_constraint;
+  IdentifierInfo *kw_cover;
+  IdentifierInfo *kw_covergroup;
+  IdentifierInfo *kw_coverpoint;
+  IdentifierInfo *kw_disable;
+  IdentifierInfo *kw_dist;
+  IdentifierInfo *kw_end;
+  IdentifierInfo *kw_endcase;
+  IdentifierInfo *kw_endchecker;
+  IdentifierInfo *kw_endclass;
+  IdentifierInfo *kw_endclocking;
+  IdentifierInfo *kw_endfunction;
+  IdentifierInfo *kw_endgenerate;
+  IdentifierInfo *kw_endgroup;
+  IdentifierInfo *kw_endinterface;
+  IdentifierInfo *kw_endmodule;
+  IdentifierInfo *kw_endpackage;
+  IdentifierInfo *kw_endprimitive;
+  IdentifierInfo *kw_endprogram;
+  IdentifierInfo *kw_endproperty;
+  IdentifierInfo *kw_endsequence;
+  IdentifierInfo *kw_endspecify;
+  IdentifierInfo *kw_endtable;
+  IdentifierInfo *kw_endtask;
+  IdentifierInfo *kw_forever;
+  IdentifierInfo *kw_fork;
+  IdentifierInfo *kw_generate;
+  IdentifierInfo *kw_highz0;
+  IdentifierInfo *kw_highz1;
+  IdentifierInfo *kw_iff;
+  IdentifierInfo *kw_ifnone;
+  IdentifierInfo *kw_ignore_bins;
+  IdentifierInfo *kw_illegal_bins;
+  IdentifierInfo *kw_initial;
+  IdentifierInfo *kw_inout;
+  IdentifierInfo *kw_input;
+  IdentifierInfo *kw_inside;
+  IdentifierInfo *kw_interconnect;
+  IdentifierInfo *kw_intersect;
+  IdentifierInfo *kw_join;
+  IdentifierInfo *kw_join_any;
+  IdentifierInfo *kw_join_none;
+  IdentifierInfo *kw_large;
+  IdentifierInfo *kw_local;
+  IdentifierInfo *kw_localparam;
+  IdentifierInfo *kw_macromodule;
+  IdentifierInfo *kw_matches;
+  IdentifierInfo *kw_medium;
+  IdentifierInfo *kw_output;
+  IdentifierInfo *kw_packed;
+  IdentifierInfo *kw_parameter;
+  IdentifierInfo *kw_primitive;
+  IdentifierInfo *kw_priority;
+  IdentifierInfo *kw_program;
+  IdentifierInfo *kw_property;
+  IdentifierInfo *kw_pull0;
+  IdentifierInfo *kw_pull1;
+  IdentifierInfo *kw_pure;
+  IdentifierInfo *kw_rand;
+  IdentifierInfo *kw_randc;
+  IdentifierInfo *kw_randcase;
+  IdentifierInfo *kw_randsequence;
+  IdentifierInfo *kw_repeat;
+  IdentifierInfo *kw_sample;
+  IdentifierInfo *kw_scalared;
+  IdentifierInfo *kw_sequence;
+  IdentifierInfo *kw_small;
+  IdentifierInfo *kw_soft;
+  IdentifierInfo *kw_solve;
+  IdentifierInfo *kw_specify;
+  IdentifierInfo *kw_specparam;
+  IdentifierInfo *kw_strong0;
+  IdentifierInfo *kw_strong1;
+  IdentifierInfo *kw_supply0;
+  IdentifierInfo *kw_supply1;
+  IdentifierInfo *kw_table;
+  IdentifierInfo *kw_tagged;
+  IdentifierInfo *kw_task;
+  IdentifierInfo *kw_tri;
+  IdentifierInfo *kw_tri0;
+  IdentifierInfo *kw_tri1;
+  IdentifierInfo *kw_triand;
+  IdentifierInfo *kw_trior;
+  IdentifierInfo *kw_trireg;
+  IdentifierInfo *kw_unique;
+  IdentifierInfo *kw_unique0;
+  IdentifierInfo *kw_uwire;
+  IdentifierInfo *kw_vectored;
+  IdentifierInfo *kw_wand;
+  IdentifierInfo *kw_weak0;
+  IdentifierInfo *kw_weak1;
+  IdentifierInfo *kw_wildcard;
+  IdentifierInfo *kw_wire;
+  IdentifierInfo *kw_with;
+  IdentifierInfo *kw_wor;
+
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
   /// \c false if it is a keyword or a pseudo keyword.
   /// If \c AcceptIdentifierName is true, returns true not only for keywords,
@@ -1139,7 +1411,7 @@
                               bool AcceptIdentifierName = true) const {
     // Based on the list of JavaScript & TypeScript keywords here:
     // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74
-    switch (Tok.Tok.getKind()) {
+    switch (Tok.getKind()) {
     case tok::kw_break:
     case tok::kw_case:
     case tok::kw_catch:
@@ -1187,7 +1459,7 @@
       break;
     }
 
-    switch (Tok.Tok.getKind()) {
+    switch (Tok.getKind()) {
       // Handle C++ keywords not included above: these are all JS identifiers.
 #define KEYWORD(X, Y) case tok::kw_##X:
 #include "clang/Basic/TokenKinds.def"
@@ -1203,7 +1475,7 @@
   /// Returns \c true if \p Tok is a C# keyword, returns
   /// \c false if it is a anything else.
   bool isCSharpKeyword(const FormatToken &Tok) const {
-    switch (Tok.Tok.getKind()) {
+    switch (Tok.getKind()) {
     case tok::kw_bool:
     case tok::kw_break:
     case tok::kw_case:
@@ -1256,12 +1528,69 @@
     }
   }
 
+  bool isVerilogIdentifier(const FormatToken &Tok) const {
+    switch (Tok.Tok.getKind()) {
+    case tok::kw_case:
+    case tok::kw_class:
+    case tok::kw_const:
+    case tok::kw_continue:
+    case tok::kw_default:
+    case tok::kw_do:
+    case tok::kw_extern:
+    case tok::kw_else:
+    case tok::kw_enum:
+    case tok::kw_for:
+    case tok::kw_if:
+    case tok::kw_restrict:
+    case tok::kw_signed:
+    case tok::kw_static:
+    case tok::kw_struct:
+    case tok::kw_typedef:
+    case tok::kw_union:
+    case tok::kw_unsigned:
+    case tok::kw_virtual:
+    case tok::kw_while:
+      return false;
+    case tok::identifier:
+      return VerilogExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
+             VerilogExtraKeywords.end();
+    default:
+      // getIdentifierInfo returns non-null for both identifiers and keywords.
+      return Tok.Tok.getIdentifierInfo() != nullptr;
+    }
+  }
+
+  /// Returns whether \p Tok is a Verilog keyword that opens a block.
+  bool isVerilogBegin(const FormatToken &Tok) const {
+    // `table` is not included since it needs to be treated specially.
+    return !Tok.endsSequence(kw_fork, kw_disable) &&
+           Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify);
+  }
+
+  /// Returns whether \p Tok is a Verilog keyword that closes a block.
+  bool isVerilogEnd(const FormatToken &Tok) const {
+    return !Tok.endsSequence(kw_join, kw_rand) &&
+           Tok.isOneOf(kw_end, kw_endcase, kw_endclass, kw_endclocking,
+                       kw_endchecker, kw_endfunction, kw_endgenerate,
+                       kw_endgroup, kw_endinterface, kw_endmodule,
+                       kw_endpackage, kw_endprimitive, kw_endprogram,
+                       kw_endproperty, kw_endsequence, kw_endspecify,
+                       kw_endtable, kw_endtask, kw_join_any, kw_join_none);
+  }
+
+  /// Whether the token begins a block.
+  bool isBlockBegin(const FormatToken &Tok, const FormatStyle &Style) const {
+    return Style.isVerilog() ? isVerilogBegin(Tok) : Tok.is(tok::l_brace);
+  }
+
 private:
   /// The JavaScript keywords beyond the C++ keyword set.
   std::unordered_set<IdentifierInfo *> JsExtraKeywords;
 
   /// The C# keywords beyond the C++ keyword set
   std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
+
+  std::unordered_set<IdentifierInfo *> VerilogExtraKeywords;
 };
 
 } // namespace format
Index: clang/lib/Format/FormatToken.cpp
===================================================================
--- clang/lib/Format/FormatToken.cpp
+++ clang/lib/Format/FormatToken.cpp
@@ -37,7 +37,7 @@
 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
 // duplication.
 bool FormatToken::isSimpleTypeSpecifier() const {
-  switch (Tok.getKind()) {
+  switch (getKind()) {
   case tok::kw_short:
   case tok::kw_long:
   case tok::kw___int64:
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -3423,6 +3423,11 @@
     return FormatStyle::LK_CSharp;
   if (FileName.endswith_insensitive(".json"))
     return FormatStyle::LK_Json;
+  if (FileName.endswith_insensitive(".sv") ||
+      FileName.endswith_insensitive(".svh") ||
+      FileName.endswith_insensitive(".v") ||
+      FileName.endswith_insensitive(".vh"))
+    return FormatStyle::LK_Verilog;
   return FormatStyle::LK_Cpp;
 }
 
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -2589,12 +2589,17 @@
     LK_TableGen,
     /// Should be used for Protocol Buffer messages in text format
     /// (https://developers.google.com/protocol-buffers/).
-    LK_TextProto
+    LK_TextProto,
+    /// Should be used for Verilog and SystemVerilog.
+    /// https://standards.ieee.org/ieee/1800/6700/
+    /// https://sci-hub.st/10.1109/IEEESTD.2018.8299595
+    LK_Verilog
   };
   bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
   bool isCSharp() const { return Language == LK_CSharp; }
   bool isJson() const { return Language == LK_Json; }
   bool isJavaScript() const { return Language == LK_JavaScript; }
+  bool isVerilog() const { return Language == LK_Verilog; }
 
   /// Language, this format style is targeted at.
   /// \version 3.5
@@ -4285,6 +4290,8 @@
     return "TableGen";
   case FormatStyle::LK_TextProto:
     return "TextProto";
+  case FormatStyle::LK_Verilog:
+    return "Verilog";
   default:
     return "Unknown";
   }
Index: clang/docs/ClangFormat.rst
===================================================================
--- clang/docs/ClangFormat.rst
+++ clang/docs/ClangFormat.rst
@@ -43,6 +43,17 @@
     --assume-filename=<string>     - Override filename used to determine the language.
                                      When reading from stdin, clang-format assumes this
                                      filename to determine the language.
+                                     Unrecognized filenames are treated as C++.
+                                     supported:
+                                       CSharp: .cs
+                                       Java: .java
+                                       JavaScript: .mjs .js .ts
+                                       Json: .json
+                                       Objective-C: .m .mm
+                                       Proto: .proto .protodevel
+                                       TableGen: .td
+                                       TextProto: .textpb .pb.txt .textproto .asciipb
+                                       Verilog: .sv .svh .v .vh
     --cursor=<uint>                - The position of the cursor when invoking
                                      clang-format from an editor integration
     --dry-run                      - If set, do not actually make the formatting changes
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to