owenpan created this revision.
owenpan added reviewers: MyDeveloperDay, HazardyKnusperkeks, rymiel.
owenpan added a project: clang-format.
Herald added a project: All.
owenpan requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Closes https://github.com/llvm/llvm-project/issues/58949.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140543

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/CMakeLists.txt
  clang/lib/Format/Format.cpp
  clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
  clang/lib/Format/IntegerLiteralSeparatorFixer.h

Index: clang/lib/Format/IntegerLiteralSeparatorFixer.h
===================================================================
--- /dev/null
+++ clang/lib/Format/IntegerLiteralSeparatorFixer.h
@@ -0,0 +1,39 @@
+//===--- IntegerLiteralSeparatorFixer.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares IntegerLiteralSeparatorFixer that fixes C++ integer
+/// literal separators.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
+#define LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class IntegerLiteralSeparatorFixer {
+public:
+  std::pair<tooling::Replacements, unsigned>
+  process(const Environment &Env, const FormatStyle &Style) const;
+
+private:
+  static const auto Separator = '\'';
+
+  bool checkSeparator(const StringRef IntegerLiteral, int DigitsPerGroup) const;
+  std::string format(const StringRef IntegerLiteral, int DigitsPerGroup,
+                     bool RemoveSeparator) const;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
Index: clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
===================================================================
--- /dev/null
+++ clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -0,0 +1,174 @@
+//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
+/// literal separators.
+///
+//===----------------------------------------------------------------------===//
+
+#include "IntegerLiteralSeparatorFixer.h"
+
+namespace clang {
+namespace format {
+
+enum class Base { Binary, Decimal, Hex, Other };
+
+static Base getBase(const StringRef IntegerLiteral) {
+  assert(IntegerLiteral.size() > 1);
+
+  if (IntegerLiteral[0] > '0') {
+    assert(IntegerLiteral[0] <= '9');
+    return Base::Decimal;
+  }
+
+  assert(IntegerLiteral[0] == '0');
+
+  switch (IntegerLiteral[1]) {
+  case 'b':
+  case 'B':
+    return Base::Binary;
+  case 'x':
+  case 'X':
+    return Base::Hex;
+  default:
+    return Base::Other;
+  }
+}
+
+std::pair<tooling::Replacements, unsigned>
+IntegerLiteralSeparatorFixer::process(const Environment &Env,
+                                      const FormatStyle &Style) const {
+  const auto &Option = Style.IntegerLiteralSeparator;
+  const auto Binary = Option.Binary;
+  const auto Decimal = Option.Decimal;
+  const auto Hex = Option.Hex;
+  const bool SkipBinary = Binary == 0;
+  const bool SkipDecimal = Decimal == 0;
+  const bool SkipHex = Hex == 0;
+
+  if (SkipBinary && SkipDecimal && SkipHex)
+    return {};
+
+  const auto ID = Env.getFileID();
+  const auto &SourceMgr = Env.getSourceManager();
+  std::unique_ptr<Lexer> Lex;
+  Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr,
+                      getFormattingLangOpts(Style)));
+  Lex->SetCommentRetentionState(true);
+
+  Token Tok;
+  Lex->LexFromRawLexer(Tok);
+
+  tooling::Replacements Result;
+  for (bool Skip = false; Tok.isNot(tok::eof); Lex->LexFromRawLexer(Tok)) {
+    auto Length = Tok.getLength();
+    if (Length < 2)
+      continue;
+    auto Location = Tok.getLocation();
+    auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
+    if (Tok.is(tok::comment)) {
+      if (Text == "// clang-format off" || Text == "/* clang-format off */")
+        Skip = true;
+      if (Text == "// clang-format on" || Text == "/* clang-format on */")
+        Skip = false;
+      continue;
+    }
+    if (Skip || Tok.isNot(tok::numeric_constant))
+      continue;
+    if (Text.find_first_of(".eEpP") != StringRef::npos)
+      continue;
+    const auto B = getBase(Text);
+    const bool IsBase2 = B == Base::Binary;
+    const bool IsBase10 = B == Base::Decimal;
+    const bool IsBase16 = B == Base::Hex;
+    if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
+        (IsBase16 && SkipHex) || B == Base::Other) {
+      continue;
+    }
+    const bool RemoveSeparator = (IsBase2 && Binary == -1) ||
+                                 (IsBase10 && Decimal == -1) ||
+                                 (IsBase16 && Hex == -1);
+    if (RemoveSeparator && Text.find(Separator) == StringRef::npos)
+      continue;
+    const auto Start = Text[0] == '0' ? 2 : 0;
+    auto End = Text.find_first_of("uUlLzZ");
+    if (End == StringRef::npos)
+      End = Text.size();
+    Length = End - Start;
+    Text = Text.substr(Start, Length);
+    auto DigitsPerGroup = Decimal;
+    if (IsBase2)
+      DigitsPerGroup = Binary;
+    else if (IsBase16)
+      DigitsPerGroup = Hex;
+    if (checkSeparator(Text, DigitsPerGroup))
+      continue;
+    if (Start > 0)
+      Location = Location.getLocWithOffset(Start);
+    cantFail(Result.add(
+        tooling::Replacement(SourceMgr, Location, Length,
+                             format(Text, DigitsPerGroup, RemoveSeparator))));
+  }
+
+  return {Result, 0};
+}
+
+bool IntegerLiteralSeparatorFixer::checkSeparator(
+    const StringRef IntegerLiteral, int DigitsPerGroup) const {
+  assert(DigitsPerGroup > 0);
+
+  int I = 0;
+  for (auto C : llvm::reverse(IntegerLiteral)) {
+    if (C == Separator) {
+      if (I < DigitsPerGroup)
+        return false;
+      I = 0;
+    } else {
+      ++I;
+      if (I == DigitsPerGroup)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
+                                                 int DigitsPerGroup,
+                                                 bool RemoveSeparator) const {
+  int DigitCount = 0;
+  for (auto C : IntegerLiteral)
+    if (C != Separator)
+      ++DigitCount;
+
+  int Remainder = DigitCount % DigitsPerGroup;
+
+  std::string Formatted;
+  int I = 0;
+  for (auto C : IntegerLiteral) {
+    if (C == Separator)
+      continue;
+    if (RemoveSeparator) {
+      Formatted.push_back(C);
+      continue;
+    }
+    if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
+      Formatted.push_back(Separator);
+      I = 0;
+      Remainder = 0;
+    }
+    Formatted.push_back(C);
+    ++I;
+  }
+
+  return Formatted;
+}
+
+} // namespace format
+} // namespace clang
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -20,6 +20,7 @@
 #include "FormatInternal.h"
 #include "FormatToken.h"
 #include "FormatTokenLexer.h"
+#include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "QualifierAlignmentFixer.h"
 #include "SortJavaScriptImports.h"
@@ -335,6 +336,14 @@
   }
 };
 
+template <> struct MappingTraits<FormatStyle::IntegerLiteralSeparatorStyle> {
+  static void mapping(IO &IO, FormatStyle::IntegerLiteralSeparatorStyle &Base) {
+    IO.mapOptional("Binary", Base.Binary);
+    IO.mapOptional("Decimal", Base.Decimal);
+    IO.mapOptional("Hex", Base.Hex);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
   static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
     IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
@@ -881,6 +890,7 @@
                    Style.IndentWrappedFunctionNames);
     IO.mapOptional("InsertBraces", Style.InsertBraces);
     IO.mapOptional("InsertTrailingCommas", Style.InsertTrailingCommas);
+    IO.mapOptional("IntegerLiteralSeparator", Style.IntegerLiteralSeparator);
     IO.mapOptional("JavaImportGroups", Style.JavaImportGroups);
     IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
     IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
@@ -1335,6 +1345,7 @@
   LLVMStyle.IndentWrappedFunctionNames = false;
   LLVMStyle.InsertBraces = false;
   LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None;
+  LLVMStyle.IntegerLiteralSeparator = {/*Binary=*/0, /*Decimal=*/0, /*Hex=*/0};
   LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
   LLVMStyle.JavaScriptWrapImports = true;
   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
@@ -3392,6 +3403,10 @@
   SmallVector<AnalyzerPass, 8> Passes;
 
   if (Style.isCpp()) {
+    Passes.emplace_back([&](const Environment &Env) {
+      return IntegerLiteralSeparatorFixer().process(Env, Expanded);
+    });
+
     if (Style.QualifierAlignment != FormatStyle::QAS_Leave) {
       Passes.emplace_back([&](const Environment &Env) {
         return QualifierAlignmentFixer(Env, Expanded, Code, Ranges,
Index: clang/lib/Format/CMakeLists.txt
===================================================================
--- clang/lib/Format/CMakeLists.txt
+++ clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  IntegerLiteralSeparatorFixer.cpp
   MacroCallReconstructor.cpp
   MacroExpander.cpp
   NamespaceEndCommentsFixer.cpp
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -2388,6 +2388,36 @@
   /// \version 3.7
   bool IndentWrappedFunctionNames;
 
+  /// Separator format of C++ integer literals of different bases.
+  /// -1: Remove separators.
+  ///  0: Leave the literal as is.
+  /// >0: Insert separators between digits, starting from the rightmost digit.
+  struct IntegerLiteralSeparatorStyle {
+    /// \code
+    ///    -1: 0b100111101101
+    ///     0: 0b10011'11'0110'1
+    ///     3: 0b100'111'101'101
+    ///     4: 0b1001'1110'1101
+    /// \endcode
+    int8_t Binary;
+    /// \code
+    ///    -1: 18446744073709550592ull
+    ///     0: 184467'440737'0'95505'92ull
+    ///     3: 18'446'744'073'709'550'592ull
+    /// \endcode
+    int8_t Decimal;
+    /// \code
+    ///    -1: 0xDEADBEEFDEADBEEFull
+    ///     0: 0xDEAD'BEEF'DE'AD'BEE'Full
+    ///     2: 0xDE'AD'BE'EF'DE'AD'BE'EFull
+    /// \endcode
+    int8_t Hex;
+  };
+
+  /// Format C++ integer literal separators.
+  /// \version 16
+  IntegerLiteralSeparatorStyle IntegerLiteralSeparator;
+
   /// Insert braces after control statements (``if``, ``else``, ``for``, ``do``,
   /// and ``while``) in C++ unless the control statements are inside macro
   /// definitions or the braces would enclose preprocessor directives.
@@ -4088,6 +4118,10 @@
            IndentRequiresClause == R.IndentRequiresClause &&
            IndentWidth == R.IndentWidth &&
            IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
+           IntegerLiteralSeparator.Binary == R.IntegerLiteralSeparator.Binary &&
+           IntegerLiteralSeparator.Decimal ==
+               R.IntegerLiteralSeparator.Decimal &&
+           IntegerLiteralSeparator.Hex == R.IntegerLiteralSeparator.Hex &&
            InsertBraces == R.InsertBraces &&
            JavaImportGroups == R.JavaImportGroups &&
            JavaScriptQuotes == R.JavaScriptQuotes &&
Index: clang/docs/ClangFormatStyleOptions.rst
===================================================================
--- clang/docs/ClangFormatStyleOptions.rst
+++ clang/docs/ClangFormatStyleOptions.rst
@@ -3159,6 +3159,36 @@
 
 
 
+**IntegerLiteralSeparator** (``IntegerLiteralSeparatorStyle``) :versionbadge:`clang-format 16`
+  Format C++ integer literal separators.
+
+  Nested configuration flags:
+
+  Separator format of C++ integer literals of different bases.
+  -1: Remove separators.
+   0: Leave the literal as is.
+  >0: Insert separators between digits, starting from the rightmost digit.
+
+  * ``int8_t Binary`` .. code-block:: c++
+
+       -1: 0b100111101101
+        0: 0b10011'11'0110'1
+        3: 0b100'111'101'101
+        4: 0b1001'1110'1101
+
+  * ``int8_t Decimal`` .. code-block:: c++
+
+       -1: 18446744073709550592ull
+        0: 184467'440737'0'95505'92ull
+        3: 18'446'744'073'709'550'592ull
+
+  * ``int8_t Hex`` .. code-block:: c++
+
+       -1: 0xDEADBEEFDEADBEEFull
+        0: 0xDEAD'BEEF'DE'AD'BEE'Full
+        2: 0xDE'AD'BE'EF'DE'AD'BE'EFull
+
+
 **JavaImportGroups** (``List of Strings``) :versionbadge:`clang-format 8`
   A vector of prefixes ordered by the desired groups for Java imports.
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to