[PATCH] D40832: [clang-format] Adds canonical raw string delimiters support

Krasimir Georgiev via Phabricator via cfe-commits Tue, 05 Dec 2017 05:19:55 -0800

krasimir updated this revision to Diff 125501.
krasimir added a comment.

- Update documentation



Repository:
  rC Clang

https://reviews.llvm.org/D40832

Files:
  docs/ClangFormatStyleOptions.rst
  include/clang/Format/Format.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/Format.cpp
  unittests/Format/FormatTestRawStrings.cpp

Index: unittests/Format/FormatTestRawStrings.cpp
===================================================================
--- unittests/Format/FormatTestRawStrings.cpp
+++ unittests/Format/FormatTestRawStrings.cpp
@@ -67,21 +67,24 @@
     Style.ColumnLimit = ColumnLimit;
     Style.RawStringFormats = {{/*Delimiter=*/"pb",
                                /*Kind=*/FormatStyle::LK_TextProto,
-                               /*BasedOnStyle=*/"google"}};
+                               /*BasedOnStyle=*/"google",
+                               /*Canonical=*/false}};
     return Style;
   }
 
   FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) {
     FormatStyle Style = getLLVMStyle();
     Style.RawStringFormats = {{/*Delimiter=*/"cpp",
-                               /*Kind=*/FormatStyle::LK_Cpp, BasedOnStyle}};
+                               /*Kind=*/FormatStyle::LK_Cpp, BasedOnStyle,
+                               /*Canonical=*/false}};
     return Style;
   }
 
   FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) {
     FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
     Style.RawStringFormats = {{/*Delimiter=*/"cpp",
-                               /*Kind=*/FormatStyle::LK_Cpp, BasedOnStyle}};
+                               /*Kind=*/FormatStyle::LK_Cpp, BasedOnStyle,
+                               /*Canonical=*/false}};
     return Style;
   }
 
@@ -125,9 +128,9 @@
   FormatStyle MixedStyle = getLLVMStyle();
   MixedStyle.RawStringFormats = {
       {/*Delimiter=*/"cpp", /*Kind=*/FormatStyle::LK_Cpp,
-       /*BasedOnStyle=*/"llvm"},
+       /*BasedOnStyle=*/"llvm", /*Canonical=*/false},
       {/*Delimiter=*/"CPP", /*Kind=*/FormatStyle::LK_Cpp,
-       /*BasedOnStyle=*/"google"}};
+       /*BasedOnStyle=*/"google", /*Canonical=*/false}};
 
   // Format the 'cpp' raw string with '*' on the right.
   // Format the 'CPP' raw string with '*' on the left.
@@ -210,9 +213,9 @@
 P p = TP(R"pb(item_1 <1>
               item_2: <2>
               item_3 {})pb");)test",
-      format(R"test(
+            format(R"test(
 P p = TP(R"pb(item_1<1> item_2:<2> item_3{ })pb");)test",
-          getRawStringPbStyleWithColumns(40)));
+                   getRawStringPbStyleWithColumns(40)));
 
   expect_eq(
       R"test(
@@ -515,7 +518,6 @@
             format(R"test(
 ASSERT_TRUE(ParseFromString(R"pb(item_1: 1 item_2: 2)pb"), ptr);)test",
                    getRawStringPbStyleWithColumns(40)));
-
 }
 
 TEST_F(FormatTestRawStrings, RawStringsInOperands) {
@@ -642,7 +644,6 @@
 auto S=(count<3)?R"pb(item_1:1)pb":R"pb(item_2:2,item_3:3)pb";
 )test",
                    getRawStringPbStyleWithColumns(40)));
-
 }
 
 TEST_F(FormatTestRawStrings, PrefixAndSuffixAlignment) {
@@ -728,6 +729,54 @@
                    getRawStringPbStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestRawStrings, UpdatesToCanonicalDelimiters) {
+  FormatStyle Style = getRawStringPbStyleWithColumns(25);
+  Style.RawStringFormats.push_back({/*Delimiter=*/"proto",
+                                    /*Kind=*/FormatStyle::LK_TextProto,
+                                    /*BasedOnStyle=*/"google",
+                                    /*Canonical=*/true});
+  expect_eq(R"test(a = R"proto(key: value)proto";)test",
+            format(R"test(a = R"pb(key:value)pb";)test", Style));
+  expect_eq(R"test(a = R"proto(key: value)proto";)test",
+            format(R"test(a = R"proto(key:value)proto";)test", Style));
+  expect_eq(R"test(
+f(a,
+  R"proto(item {
+            key: value
+            key: value
+          })proto");
+)test",
+            format(R"test(
+f(a,
+  R"pb(item {
+         key: value
+         key: value
+       })pb");
+)test",
+                   Style));
+  // The last canonical delimiter for a language is the canonical delimiter for
+  // that language.
+  Style.RawStringFormats.push_back({/*Delimiter=*/"cproto",
+                                    /*Kind=*/FormatStyle::LK_TextProto,
+                                    /*BasedOnStyle=*/"google",
+                                    /*Canonical=*/true});
+  expect_eq(R"test(
+f(a,
+  R"cproto(item {
+             key: value
+             key: value
+           })cproto");
+)test",
+            format(R"test(
+f(a,
+  R"proto(item {
+         key: value
+         key: value
+       })proto");
+)test",
+                   Style));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -641,7 +641,8 @@
   LLVMStyle.SpacesBeforeTrailingComments = 1;
   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
-  LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}};
+  LLVMStyle.RawStringFormats = {
+      {"pb", FormatStyle::LK_TextProto, "google", /*Canonical=*/false}};
   LLVMStyle.ReflowComments = true;
   LLVMStyle.SpacesInParentheses = false;
   LLVMStyle.SpacesInSquareBrackets = false;
@@ -697,6 +698,12 @@
   GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
   GoogleStyle.SpacesBeforeTrailingComments = 2;
   GoogleStyle.Standard = FormatStyle::LS_Auto;
+  GoogleStyle.RawStringFormats = {
+      {"pb", FormatStyle::LK_TextProto, "google", /*Canonical=*/false},
+      {"PB", FormatStyle::LK_TextProto, "google", /*Canonical=*/false},
+      {"PROTO", FormatStyle::LK_TextProto, "google", /*Canonical=*/false},
+      {"proto", FormatStyle::LK_TextProto, "google", /*Canonical=*/true},
+  };
 
   GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -102,6 +102,16 @@
   return Delimiter;
 }
 
+static llvm::Optional<StringRef>
+getCanonicalRawStringDelimiter(const FormatStyle &Style,
+                               FormatStyle::LanguageKind Language) {
+  for (const auto &Format : llvm::reverse(Style.RawStringFormats)) {
+    if (Format.Language == Language && Format.Canonical)
+      return StringRef(Format.Delimiter);
+  }
+  return None;
+}
+
 RawStringFormatStyleManager::RawStringFormatStyleManager(
     const FormatStyle &CodeStyle) {
   for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
@@ -711,8 +721,8 @@
       // about removing empty lines on closing blocks. Special case them here.
       MaxEmptyLinesToKeep = 1;
     }
-    unsigned Newlines = std::max(
-        1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
+    unsigned Newlines =
+        std::max(1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
     bool ContinuePPDirective =
         State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
     Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
@@ -1034,8 +1044,7 @@
   State.Column += Current.ColumnWidth;
   State.NextToken = State.NextToken->Next;
 
-  unsigned Penalty =
-      handleEndOfLine(Current, State, DryRun, AllowBreak);
+  unsigned Penalty = handleEndOfLine(Current, State, DryRun, AllowBreak);
 
   if (Current.Role)
     Current.Role->formatFromToken(State, this, DryRun);
@@ -1291,14 +1300,25 @@
     const FormatToken &Current, LineState &State,
     const FormatStyle &RawStringStyle, bool DryRun) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
-  auto Delimiter = *getRawStringDelimiter(Current.TokenText);
+  StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
+  auto CanonicalDelimiter =
+      getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
+  StringRef NewDelimiter =
+      CanonicalDelimiter ? *CanonicalDelimiter : OldDelimiter;
+  bool UpdateDelimiter =
+      CanonicalDelimiter && *CanonicalDelimiter != OldDelimiter;
+
   // The text of a raw string is between the leading 'R"delimiter(' and the
   // trailing 'delimiter)"'.
-  unsigned PrefixSize = 3 + Delimiter.size();
-  unsigned SuffixSize = 2 + Delimiter.size();
+  unsigned OldPrefixSize = 3 + OldDelimiter.size();
+  unsigned NewPrefixSize =
+      UpdateDelimiter ? (3 + CanonicalDelimiter->size()) : OldPrefixSize;
+  unsigned OldSuffixSize = 2 + OldDelimiter.size();
+  unsigned NewSuffixSize =
+      UpdateDelimiter ? (2 + CanonicalDelimiter->size()) : OldSuffixSize;
 
-  // The first start column is the column the raw text starts.
-  unsigned FirstStartColumn = StartColumn + PrefixSize;
+  // The first start column is the column the raw text starts after formatting.
+  unsigned FirstStartColumn = StartColumn + NewPrefixSize;
 
   // The next start column is the intended indentation a line break inside
   // the raw string at level 0. It is determined by the following rules:
@@ -1309,7 +1329,7 @@
   // These rules have the advantage that the formatted content both does not
   // violate the rectangle rule and visually flows within the surrounding
   // source.
-  bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
+  bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
   unsigned NextStartColumn = ContentStartsOnNewline
                                  ? State.Stack.back().Indent + Style.IndentWidth
                                  : FirstStartColumn;
@@ -1323,11 +1343,11 @@
   //   - if the raw string prefix does not start on a newline, it is the current
   //     indent.
   unsigned LastStartColumn = Current.NewlinesBefore
-                                 ? FirstStartColumn - PrefixSize
+                                 ? FirstStartColumn - NewPrefixSize
                                  : State.Stack.back().Indent;
 
   std::string RawText =
-      Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
+      Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize);
 
   std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
       RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
@@ -1341,8 +1361,33 @@
     return 0;
   }
   if (!DryRun) {
+    if (UpdateDelimiter) {
+      // In 'R"delimiter(...', the delimiter starts 2 characters after the start
+      // of the token.
+      SourceLocation PrefixDelimiterStart =
+          Current.Tok.getLocation().getLocWithOffset(2);
+      auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
+      if (PrefixErr) {
+        llvm::errs()
+            << "Failed to update the prefix delimiter of a raw string: "
+            << llvm::toString(std::move(PrefixErr)) << "\n";
+      }
+      // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
+      // position length - 1 - |delimiter|.
+      SourceLocation SuffixDelimiterStart =
+          Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
+                                                     1 - OldDelimiter.size());
+      auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
+      if (SuffixErr) {
+        llvm::errs()
+            << "Failed to update the suffix delimiter of a raw string: "
+            << llvm::toString(std::move(SuffixErr)) << "\n";
+      }
+    }
     SourceLocation OriginLoc =
-        Current.Tok.getLocation().getLocWithOffset(PrefixSize);
+        Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
     for (const tooling::Replacement &Fix : Fixes.first) {
       auto Err = Whitespaces.addReplacement(tooling::Replacement(
           SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
@@ -1355,7 +1400,7 @@
   }
   unsigned RawLastLineEndColumn = getLastLineEndColumn(
       *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
-  State.Column = RawLastLineEndColumn + SuffixSize;
+  State.Column = RawLastLineEndColumn + NewSuffixSize;
   return Fixes.second;
 }
 
@@ -1443,16 +1488,16 @@
   return RawStringStyle;
 }
 
-std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
-    const FormatToken &Current, LineState &State, bool AllowBreak) {
+std::unique_ptr<BreakableToken>
+ContinuationIndenter::createBreakableToken(const FormatToken &Current,
+                                           LineState &State, bool AllowBreak) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
   if (Current.isStringLiteral()) {
     // FIXME: String literal breaking is currently disabled for Java and JS, as
     // it requires strings to be merged using "+" which we don't support.
     if (Style.Language == FormatStyle::LK_Java ||
         Style.Language == FormatStyle::LK_JavaScript ||
-        !Style.BreakStringLiterals ||
-        !AllowBreak)
+        !Style.BreakStringLiterals || !AllowBreak)
       return nullptr;
 
     // Don't break string literals inside preprocessor directives (except for
@@ -1638,7 +1683,8 @@
             Token->getLengthAfterCompression(ToNextSplitColumns, Split);
         DEBUG(llvm::dbgs() << "    ContentStartColumn: " << ContentStartColumn
                            << "\n");
-        DEBUG(llvm::dbgs() << "    ToNextSplit: " << ToNextSplitColumns << "\n");
+        DEBUG(llvm::dbgs() << "    ToNextSplit: " << ToNextSplitColumns
+                           << "\n");
         // If the whitespace compression makes us fit, continue on the current
         // line.
         bool ContinueOnLine =
Index: include/clang/Format/Format.h
===================================================================
--- include/clang/Format/Format.h
+++ include/clang/Format/Format.h
@@ -1371,9 +1371,16 @@
     /// If not specified, the raw string format is based on the style that this
     /// format is based on.
     std::string BasedOnStyle;
+    /// \brief If true, this delimiter is canonical for this language.
+    /// The canonical delimiter for a given language is the last delimiter
+    /// declared canonical in the sequence of raw string formats, if it exists.
+    /// If a canonical raw string format exists for a language, instances of
+    /// other raw string delimiters for that language will have their delimiter
+    /// updated to the canonical one.
+    bool Canonical;
     bool operator==(const RawStringFormat &Other) const {
       return Delimiter == Other.Delimiter && Language == Other.Language &&
-             BasedOnStyle == Other.BasedOnStyle;
+             BasedOnStyle == Other.BasedOnStyle && Canonical == Other.Canonical;
     }
   };
 
@@ -1383,16 +1390,20 @@
   /// A raw string with a matching delimiter will be reformatted assuming the
   /// specified language based on a predefined style given by 'BasedOnStyle'.
   /// If 'BasedOnStyle' is not found, the formatting is based on llvm style.
+  /// If it exists, the last canonical delimiter for a given language will be
+  /// used to update other delimiters for that language.
   ///
   /// To configure this in the .clang-format file, use:
   /// \code{.yaml}
   ///   RawStringFormats:
   ///     - Delimiter: 'pb'
   ///       Language:  TextProto
   ///       BasedOnStyle: llvm
+  ///       Canonical: false
   ///     - Delimiter: 'proto'
   ///       Language:  TextProto
   ///       BasedOnStyle: google
+  ///       Canonical: true
   /// \endcode
   std::vector<RawStringFormat> RawStringFormats;
 
Index: docs/ClangFormatStyleOptions.rst
===================================================================
--- docs/ClangFormatStyleOptions.rst
+++ docs/ClangFormatStyleOptions.rst
@@ -994,28 +994,28 @@
 
     .. code-block:: c++
 
-      Constructor()
-          : initializer1(),
-            initializer2()
+    Constructor()
+        : initializer1(),
+          initializer2()
 
   * ``BCIS_BeforeComma`` (in configuration: ``BeforeComma``)
     Break constructor initializers before the colon and commas, and align
     the commas with the colon.
 
     .. code-block:: c++
 
-      Constructor()
-          : initializer1()
-          , initializer2()
+    Constructor()
+        : initializer1()
+        , initializer2()
 
   * ``BCIS_AfterColon`` (in configuration: ``AfterColon``)
     Break constructor initializers after the colon and commas.
 
     .. code-block:: c++
 
-      Constructor() :
-          initializer1(),
-          initializer2()
+    Constructor() :
+        initializer1(),
+        initializer2()
 
 
 
@@ -1201,7 +1201,8 @@
 
   * ``IBS_Regroup`` (in configuration: ``Regroup``)
     Merge multiple ``#include`` blocks together and sort as one.
-    Then split into groups based on category priority. See ``IncludeCategories``.
+    Then split into groups based on category priority. See
+    ``IncludeCategories``.
 
     .. code-block:: c++
 
@@ -1583,6 +1584,8 @@
   A raw string with a matching delimiter will be reformatted assuming the
   specified language based on a predefined style given by 'BasedOnStyle'.
   If 'BasedOnStyle' is not found, the formatting is based on llvm style.
+  If it exists, the last canonical delimiter for a given language will be
+  used to update other delimiters for that language.
 
   To configure this in the .clang-format file, use:
 
@@ -1592,9 +1595,11 @@
       - Delimiter: 'pb'
         Language:  TextProto
         BasedOnStyle: llvm
+        Canonical: false
       - Delimiter: 'proto'
         Language:  TextProto
         BasedOnStyle: google
+        Canonical: true
 
 **ReflowComments** (``bool``)
   If ``true``, clang-format will attempt to re-flow comments.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D40832: [clang-format] Adds canonical raw string delimiters support

Reply via email to