sstwcw created this revision.
sstwcw added reviewers: HazardyKnusperkeks, MyDeveloperDay, curdeius, owenpan.
Herald added a project: All.
sstwcw requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D126845

Files:
  clang/lib/Format/FormatToken.h
  clang/lib/Format/FormatTokenLexer.cpp
  clang/lib/Format/FormatTokenLexer.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/unittests/Format/FormatTestVerilog.cpp

Index: clang/unittests/Format/FormatTestVerilog.cpp
===================================================================
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -45,6 +45,27 @@
   }
 };
 
+TEST_F(FormatTestVerilog, BasedLiteral) {
+  verifyFormat("x = '0;");
+  verifyFormat("x = '1;");
+  verifyFormat("x = 'X;");
+  verifyFormat("x = 'x;");
+  verifyFormat("x = 'Z;");
+  verifyFormat("x = 'z;");
+  verifyFormat("x = 659;");
+  verifyFormat("x = 'h837ff;");
+  verifyFormat("x = 'o7460;");
+  verifyFormat("x = 4'b1001;");
+  verifyFormat("x = 5'D3;");
+  verifyFormat("x = 3'b01x;");
+  verifyFormat("x = 12'hx;");
+  verifyFormat("x = 16'hz;");
+  verifyFormat("x = -8'd6;");
+  verifyFormat("x = 4'shf;");
+  verifyFormat("x = -4'sd15;");
+  verifyFormat("x = 16'sd?;");
+}
+
 TEST_F(FormatTestVerilog, Delay) {
   // Delay by the default unit.
   verifyFormat("#0;");
@@ -139,6 +160,64 @@
                "  {x} = {x};");
 }
 
+TEST_F(FormatTestVerilog, Operators) {
+  // Test that unary operators are not followed by space.
+  verifyFormat("x = +x;");
+  verifyFormat("x = -x;");
+  verifyFormat("x = !x;");
+  verifyFormat("x = ~x;");
+  verifyFormat("x = &x;");
+  verifyFormat("x = ~&x;");
+  verifyFormat("x = |x;");
+  verifyFormat("x = ~|x;");
+  verifyFormat("x = ^x;");
+  verifyFormat("x = ~^x;");
+  verifyFormat("x = ^~x;");
+  verifyFormat("x = ++x;");
+  verifyFormat("x = --x;");
+
+  // Test that operators don't get split.
+  verifyFormat("x = x++;");
+  verifyFormat("x = x--;");
+  verifyFormat("x = x ** x;");
+  verifyFormat("x = x << x;");
+  verifyFormat("x = x >> x;");
+  verifyFormat("x = x <<< x;");
+  verifyFormat("x = x >>> x;");
+  verifyFormat("x = x <= x;");
+  verifyFormat("x = x >= x;");
+  verifyFormat("x = x == x;");
+  verifyFormat("x = x != x;");
+  verifyFormat("x = x === x;");
+  verifyFormat("x = x !== x;");
+  verifyFormat("x = x ==? x;");
+  verifyFormat("x = x !=? x;");
+  verifyFormat("x = x ~^ x;");
+  verifyFormat("x = x ^~ x;");
+  verifyFormat("x = x && x;");
+  verifyFormat("x = x || x;");
+  verifyFormat("x = x->x;");
+  verifyFormat("x = x <-> x;");
+  verifyFormat("x += x;");
+  verifyFormat("x -= x;");
+  verifyFormat("x *= x;");
+  verifyFormat("x /= x;");
+  verifyFormat("x %= x;");
+  verifyFormat("x &= x;");
+  verifyFormat("x ^= x;");
+  verifyFormat("x |= x;");
+  verifyFormat("x <<= x;");
+  verifyFormat("x >>= x;");
+  verifyFormat("x <<<= x;");
+  verifyFormat("x >>>= x;");
+  verifyFormat("x <= x;");
+
+  // Test that space is added between operators.
+  EXPECT_EQ("x = x < -x;", format("x=x<-x;"));
+  EXPECT_EQ("x = x << -x;", format("x=x<<-x;"));
+  EXPECT_EQ("x = x <<< -x;", format("x=x<<<-x;"));
+}
+
 TEST_F(FormatTestVerilog, Preprocessor) {
   auto Style = getLLVMStyle(FormatStyle::LK_Verilog);
   Style.ColumnLimit = 20;
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -1841,7 +1841,8 @@
           Current,
           Contexts.back().CanBeExpression && Contexts.back().IsExpression,
           Contexts.back().ContextType == Context::TemplateArgument));
-    } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
+    } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
+               (Style.isVerilog() && Current.is(tok::pipe))) {
       Current.setType(determinePlusMinusCaretUsage(Current));
       if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
         Contexts.back().CaretFound = true;
@@ -3954,6 +3955,21 @@
          (Left.is(tok::r_paren) && Left.MatchingParen &&
           Left.MatchingParen->endsSequence(tok::l_paren, tok::at))))
       return true;
+    // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
+    // literal like `'{}`.
+    if (Left.is(Keywords.quote) ||
+        (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant)))
+      return false;
+    // Don't add spaces between a casting type and the quote or repetition count
+    // and the brace.
+    if ((Right.is(Keywords.quote) ||
+         (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
+        !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
+          Keywords.isVerilogWordOperator(Left)) &&
+        (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
+                      tok::numeric_constant) ||
+         Keywords.isWordLike(Left)))
+      return false;
   }
   if (Left.is(TT_ImplicitStringLiteral))
     return Right.hasWhitespaceBefore();
Index: clang/lib/Format/FormatTokenLexer.h
===================================================================
--- clang/lib/Format/FormatTokenLexer.h
+++ clang/lib/Format/FormatTokenLexer.h
@@ -60,7 +60,14 @@
   bool tryMergeForEach();
   bool tryTransformTryUsageForC();
 
+  // Merge the most lately lexed tokens into a single token if their kinds are
+  // correct.
   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
+  // Merge without checking their kinds.
+  bool tryMergeTokens(size_t Count, TokenType NewType);
+  // Merge if their kinds match any one of Kinds.
+  bool tryMergeTokensAny(ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
+                         TokenType NewType);
 
   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
   bool precedesOperand(FormatToken *Tok);
Index: clang/lib/Format/FormatTokenLexer.cpp
===================================================================
--- clang/lib/Format/FormatTokenLexer.cpp
+++ clang/lib/Format/FormatTokenLexer.cpp
@@ -193,6 +193,75 @@
     if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
       return;
   }
+
+  if (Style.isVerilog()) {
+    // Merge the number following a base like `'h?a0`.
+    if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
+        Tokens.end()[-2]->is(tok::numeric_constant) &&
+        Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
+                               tok::question) &&
+        tryMergeTokens(2, TT_Unknown))
+      return;
+    // Part select.
+    if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
+                          TT_BitFieldColon))
+      return;
+    // Xnor. The combined token is treated as a caret which can also be either a
+    // unary or binary operator. The actual type is determined in
+    // TokenAnnotator. We also check the token length so we know it is not
+    // already a merged token.
+    if (Tokens.back()->TokenText.size() == 1 &&
+        tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
+                          TT_BinaryOperator)) {
+      Tokens.back()->Tok.setKind(tok::caret);
+      return;
+    }
+    // Signed shift and distribution weight.
+    if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
+      Tokens.back()->Tok.setKind(tok::lessless);
+      return;
+    }
+    if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
+      Tokens.back()->Tok.setKind(tok::greatergreater);
+      return;
+    }
+    if (tryMergeTokensAny({{tok::lessless, tok::equal},
+                           {tok::lessless, tok::lessequal},
+                           {tok::greatergreater, tok::equal},
+                           {tok::greatergreater, tok::greaterequal},
+                           {tok::colon, tok::equal},
+                           {tok::colon, tok::slash}},
+                          TT_BinaryOperator)) {
+      Tokens.back()->ForcedPrecedence = prec::Assignment;
+      return;
+    }
+    // Exponentiation, signed shift, case equality, and wildcard equality.
+    if (tryMergeTokensAny({{tok::star, tok::star},
+                           {tok::lessless, tok::less},
+                           {tok::greatergreater, tok::greater},
+                           {tok::exclaimequal, tok::equal},
+                           {tok::exclaimequal, tok::question},
+                           {tok::equalequal, tok::equal},
+                           {tok::equalequal, tok::question}},
+                          TT_BinaryOperator))
+      return;
+    // Module paths in specify blocks and implications in properties.
+    if (tryMergeTokensAny({{tok::plusequal, tok::greater},
+                           {tok::plus, tok::star, tok::greater},
+                           {tok::minusequal, tok::greater},
+                           {tok::minus, tok::star, tok::greater},
+                           {tok::less, tok::arrow},
+                           {tok::equal, tok::greater},
+                           {tok::star, tok::greater},
+                           {tok::pipeequal, tok::greater},
+                           {tok::pipe, tok::arrow},
+                           {tok::hash, tok::minus, tok::hash},
+                           {tok::hash, tok::equal, tok::hash}},
+                          TT_BinaryOperator)) {
+      Tokens.back()->ForcedPrecedence = prec::Comma;
+      return;
+    }
+  }
 }
 
 bool FormatTokenLexer::tryMergeNSStringLiteral() {
@@ -461,15 +530,28 @@
 
   SmallVectorImpl<FormatToken *>::const_iterator First =
       Tokens.end() - Kinds.size();
-  if (!First[0]->is(Kinds[0]))
+  for (unsigned i = 0; i < Kinds.size(); ++i)
+    if (!First[i]->is(Kinds[i]))
+      return false;
+
+  return tryMergeTokens(Kinds.size(), NewType);
+}
+
+bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) {
+  if (Tokens.size() < Count)
     return false;
+
+  SmallVectorImpl<FormatToken *>::const_iterator First = Tokens.end() - Count;
   unsigned AddLength = 0;
-  for (unsigned i = 1; i < Kinds.size(); ++i) {
-    if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore())
+  for (size_t i = 1; i < Count; ++i) {
+    // If there is whitespace separating the token and the previous one,
+    // they should not be merged.
+    if (First[i]->hasWhitespaceBefore())
       return false;
     AddLength += First[i]->TokenText.size();
   }
-  Tokens.resize(Tokens.size() - Kinds.size() + 1);
+
+  Tokens.resize(Tokens.size() - Count + 1);
   First[0]->TokenText = StringRef(First[0]->TokenText.data(),
                                   First[0]->TokenText.size() + AddLength);
   First[0]->ColumnWidth += AddLength;
@@ -477,6 +559,14 @@
   return true;
 }
 
+bool FormatTokenLexer::tryMergeTokensAny(
+    ArrayRef<ArrayRef<tok::TokenKind>> Kinds, TokenType NewType) {
+  return std::any_of(Kinds.begin(), Kinds.end(),
+                     [this, NewType](ArrayRef<tok::TokenKind> Kinds) {
+                       return tryMergeTokens(Kinds, NewType);
+                     });
+}
+
 // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
 bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
   // NB: This is not entirely correct, as an r_paren can introduce an operand
@@ -988,12 +1078,19 @@
   }
 
   if (Style.isVerilog()) {
+    static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase);
+    SmallVector<StringRef, 1> Matches;
     // Verilog uses the backtick instead of the hash for preprocessor stuff.
     // And it uses the hash for delays and parameter lists. In order to continue
     // using `tok::hash` in other places, the backtick gets marked as the hash
     // here.  And in order to tell the backtick and hash apart for
     // Verilog-specific stuff, the hash becomes an identifier.
-    if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {
+    if (FormatTok->is(tok::numeric_constant)) {
+      // In Verilog the quote is not part of a number.
+      auto Quote = FormatTok->TokenText.find('\'');
+      if (Quote != StringRef::npos)
+        resizeToken(Quote);
+    } else if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {
       FormatTok->Tok.setKind(tok::raw_identifier);
     } else if (FormatTok->is(tok::raw_identifier)) {
       if (FormatTok->TokenText == "`") {
@@ -1002,6 +1099,14 @@
       } else if (FormatTok->TokenText == "``") {
         FormatTok->Tok.setIdentifierInfo(nullptr);
         FormatTok->Tok.setKind(tok::hashhash);
+      } else if (Tokens.size() != 0 && Tokens.back()->is(Keywords.quote) &&
+                 NumberBase.match(FormatTok->TokenText, &Matches)) {
+        // In Verilog a in based number literal like `'b10`, there may be
+        // whitespace between `'b` and `10`. Therefore we handle the base and
+        // the rest of the number literal as two tokens. But if there is no
+        // space in the input code, we need to manually separate the two parts.
+        resizeToken(Matches[0].size());
+        FormatTok->setFinalizedType(TT_VerilogNumberBase);
       }
     }
   }
@@ -1044,6 +1149,12 @@
     StateStack.push(LexerState::TOKEN_STASHED);
   }
 
+  if (Style.isVerilog() && Tokens.size() != 0 &&
+      Tokens.back()->is(TT_VerilogNumberBase) &&
+      FormatTok->Tok.isOneOf(tok::identifier, tok::question))
+    // Mark the number following a base like `'h?a0` as a number.
+    FormatTok->Tok.setKind(tok::numeric_constant);
+
   // Now FormatTok is the next non-whitespace token.
 
   StringRef Text = FormatTok->TokenText;
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -135,6 +135,8 @@
   TYPE(CSharpGenericTypeConstraint)                                            \
   TYPE(CSharpGenericTypeConstraintColon)                                       \
   TYPE(CSharpGenericTypeConstraintComma)                                       \
+  TYPE(VerilogNumberBase) /* for the base in a number literal, not including   \
+                             the quote */                                      \
   TYPE(Unknown)
 
 /// Determines the semantic type of a syntactic token, e.g. whether "<" is a
@@ -368,6 +370,9 @@
   }
   bool isTypeFinalized() const { return TypeIsFinalized; }
 
+  /// Used to set an operator precedence explicitly.
+  prec::Level ForcedPrecedence = prec::Unknown;
+
   /// The number of newlines immediately before the \c Token.
   ///
   /// This can be used to determine what the user wrote in the original code
@@ -688,6 +693,8 @@
   }
 
   prec::Level getPrecedence() const {
+    if (ForcedPrecedence != prec::Unknown)
+      return ForcedPrecedence;
     return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
                               /*CPlusPlus11=*/true);
   }
@@ -1110,6 +1117,7 @@
     // Symbols that are treated as keywords.
     verilogHash = &IdentTable.get("#");
     verilogHashHash = &IdentTable.get("##");
+    quote = &IdentTable.get("\'");
 
     // Keep this at the end of the constructor to make sure everything here
     // is
@@ -1409,11 +1417,14 @@
   IdentifierInfo *verilogHash;
   IdentifierInfo *verilogHashHash;
 
+  // Symbols in Verilog that don't exist in C++.
+  IdentifierInfo *quote;
+
   /// Returns \c true if \p Tok is a keyword or an identifier.
   bool isWordLike(const FormatToken &Tok) const {
     // getIdentifierinfo returns non-null for keywords as well as identifiers.
     return Tok.Tok.getIdentifierInfo() != nullptr &&
-           !Tok.isOneOf(verilogHash, verilogHashHash);
+           !Tok.isOneOf(verilogHash, verilogHashHash, quote);
   }
 
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
@@ -1542,6 +1553,11 @@
     }
   }
 
+  bool isVerilogWordOperator(const FormatToken &Tok) const {
+    return Tok.isOneOf(kw_before, kw_intersect, kw_dist, kw_iff, kw_inside,
+                       kw_with);
+  }
+
   bool isVerilogIdentifier(const FormatToken &Tok) const {
     switch (Tok.Tok.getKind()) {
     case tok::kw_case:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to