llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-format

Author: Shivram (shivrm)

<details>
<summary>Changes</summary>

String literals with user-defined suffixes can now be split between lines.
 - Uses regex to identify user-defined suffixes
 - We want the suffix to be placed only on the last line, so I added 
`ContinuationPrefix` and `ContinuationPostfix` attributes to 
`BreakableStringLiteral` to have different postfixes for the last line and all 
the other lines
 - `ContinuationPrefix` is currently unused - prefixes are still placed on 
every line when splitting. I've kept it for completeness.
 - Adds a new unit test for splitting strings with user-defined-suffixes.

Fixes #<!-- -->165617

---
Full diff: https://github.com/llvm/llvm-project/pull/167150.diff


4 Files Affected:

- (modified) clang/lib/Format/BreakableToken.cpp (+16-4) 
- (modified) clang/lib/Format/BreakableToken.h (+10-2) 
- (modified) clang/lib/Format/ContinuationIndenter.cpp (+34-10) 
- (modified) clang/unittests/Format/FormatTest.cpp (+4) 


``````````diff
diff --git a/clang/lib/Format/BreakableToken.cpp 
b/clang/lib/Format/BreakableToken.cpp
index 994a427517ffc..dd9d4ecb2f3c7 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -253,10 +253,13 @@ unsigned 
BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
 
 BreakableStringLiteral::BreakableStringLiteral(
     const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
-    encoding::Encoding Encoding, const FormatStyle &Style)
+    StringRef Postfix, StringRef ContinuationPrefix,
+    StringRef ContinuationPostfix, unsigned UnbreakableTailLength,
+    bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
     : BreakableToken(Tok, InPPDirective, Encoding, Style),
       StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
+      ContinuationPrefix(ContinuationPrefix),
+      ContinuationPostfix(ContinuationPostfix),
       UnbreakableTailLength(UnbreakableTailLength) {
   assert(Tok.TokenText.starts_with(Prefix) && 
Tok.TokenText.ends_with(Postfix));
   Line = Tok.TokenText.substr(
@@ -274,9 +277,14 @@ void BreakableStringLiteral::insertBreak(unsigned 
LineIndex,
                                          unsigned TailOffset, Split Split,
                                          unsigned ContentIndent,
                                          WhitespaceManager &Whitespaces) const 
{
+
+  const unsigned SplitEnd = TailOffset + Split.first + Split.second;
+  const bool IsLastFragment = SplitEnd > Line.size() - UnbreakableTailLength;
+  StringRef LocalPostfix = (IsLastFragment) ? Postfix : ContinuationPostfix;
+
   Whitespaces.replaceWhitespaceInToken(
-      Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
-      Prefix, InPPDirective, 1, StartColumn);
+      Tok, ContinuationPrefix.size() + TailOffset + Split.first, Split.second,
+      LocalPostfix, ContinuationPrefix, InPPDirective, 1, StartColumn);
 }
 
 BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
@@ -288,6 +296,10 @@ 
BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
                             : QuoteStyle == AtDoubleQuotes        ? "@\""
                                                                   : "\"",
           /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
+          /*ContinuationPrefix=*/QuoteStyle == SingleQuotes ? "'"
+          : QuoteStyle == AtDoubleQuotes                    ? "@\""
+                                                            : "\"",
+          /*ContinuationPostfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
           UnbreakableTailLength, InPPDirective, Encoding, Style),
       BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
       QuoteStyle(QuoteStyle) {
diff --git a/clang/lib/Format/BreakableToken.h 
b/clang/lib/Format/BreakableToken.h
index 45c00b35fd01e..2ee37d3e0e059 100644
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@@ -252,6 +252,8 @@ class BreakableStringLiteral : public BreakableToken {
   /// after formatting.
   BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
                          StringRef Prefix, StringRef Postfix,
+                         StringRef ContinuationPrefix,
+                         StringRef ContinuationPostfix,
                          unsigned UnbreakableTailLength, bool InPPDirective,
                          encoding::Encoding Encoding, const FormatStyle 
&Style);
 
@@ -274,15 +276,21 @@ class BreakableStringLiteral : public BreakableToken {
 protected:
   // The column in which the token starts.
   unsigned StartColumn;
-  // The prefix a line needs after a break in the token.
+  // The prefix a line needs at the start
   StringRef Prefix;
-  // The postfix a line needs before introducing a break.
+  // The postfix a line needs at the end
   StringRef Postfix;
+  // The prefix every line except the first line needs
+  StringRef ContinuationPrefix;
+  // The postfix every line except the last line needs
+  StringRef ContinuationPostfix;
   // The token text excluding the prefix and postfix.
   StringRef Line;
   // Length of the sequence of tokens after this string literal that cannot
   // contain line breaks.
   unsigned UnbreakableTailLength;
+  // Whether the string prefix and postfix should be repeated on each line
+  // when breaking the string.
 };
 
 class BreakableStringLiteralUsingOperators : public BreakableStringLiteral {
diff --git a/clang/lib/Format/ContinuationIndenter.cpp 
b/clang/lib/Format/ContinuationIndenter.cpp
index 9ab024a03fbd7..5badd6edf4a7b 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -2540,22 +2540,46 @@ ContinuationIndenter::createBreakableToken(const 
FormatToken &Current,
 
     StringRef Prefix;
     StringRef Postfix;
+
     // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
     // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
     // reduce the overhead) for each FormatToken, which is a string, so that we
     // don't run multiple checks here on the hot path.
-    if ((Text.ends_with(Postfix = "\"") &&
-         (Text.starts_with(Prefix = "@\"") || Text.starts_with(Prefix = "\"") 
||
-          Text.starts_with(Prefix = "u\"") ||
-          Text.starts_with(Prefix = "U\"") ||
-          Text.starts_with(Prefix = "u8\"") ||
-          Text.starts_with(Prefix = "L\""))) ||
-        (Text.starts_with(Prefix = "_T(\"") &&
-         Text.ends_with(Postfix = "\")"))) {
+    if (Text.starts_with(Prefix = "_T(\"") && Text.ends_with(Postfix = "\")")) 
{
+      // We need to put `_T("` and `")` on each line because it is a macro
+      llvm::StringRef ContinuationPrefix = Prefix;
+      llvm::StringRef ContinuationPostfix = Postfix;
+
       return std::make_unique<BreakableStringLiteral>(
-          Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
-          State.Line->InPPDirective, Encoding, Style);
+          Current, StartColumn, Prefix, Postfix, ContinuationPrefix,
+          ContinuationPostfix, UnbreakableTailLength, 
State.Line->InPPDirective,
+          Encoding, Style);
+    }
+
+    static const auto PostfixRegex =
+        llvm::Regex(R"("(_[a-zA-Z_][a-zA-Z0-9_]*)?$)");
+    llvm::SmallVector<llvm::StringRef, 1> Matches;
+
+    if (PostfixRegex.match(Text, &Matches)) {
+      Postfix = Matches.front();
+
+      if ((Text.starts_with(Prefix = "@\"") ||
+           Text.starts_with(Prefix = "\"") ||
+           Text.starts_with(Prefix = "u\"") ||
+           Text.starts_with(Prefix = "U\"") ||
+           Text.starts_with(Prefix = "u8\"") ||
+           Text.starts_with(Prefix = "L\""))) {
+
+        // Repeat the prefix on every line but don't repeat the suffix
+        llvm::StringRef ContinuationPrefix = Prefix;
+        llvm::StringRef ContinuationPostfix = "\"";
+        return std::make_unique<BreakableStringLiteral>(
+            Current, StartColumn, Prefix, Postfix, ContinuationPrefix,
+            ContinuationPostfix, UnbreakableTailLength,
+            State.Line->InPPDirective, Encoding, Style);
+      }
     }
+
   } else if (Current.is(TT_BlockComment)) {
     if (Style.ReflowComments == FormatStyle::RCS_Never ||
         // If a comment token switches formatting, like
diff --git a/clang/unittests/Format/FormatTest.cpp 
b/clang/unittests/Format/FormatTest.cpp
index 24235b966399d..4c7593b88202f 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -15838,6 +15838,10 @@ TEST_F(FormatTest, BreaksWideAndNSStringLiterals) {
                "@\"NSString literal\";", getGoogleStyleWithColumns(19));
   verifyFormat(R"(NSString *s = @"那那那那";)", getLLVMStyleWithColumns(26));
 
+  EXPECT_EQ("L\"suffixed \"\n"
+            "L\"string\"_s;",
+            format("L\"suffixed string\"_s;", getLLVMStyleWithColumns(19)));
+
   // This input makes clang-format try to split the incomplete unicode escape
   // sequence, which used to lead to a crasher.
   verifyNoCrash(

``````````

</details>


https://github.com/llvm/llvm-project/pull/167150
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to