[PATCH] D29450: [clang-format] Don't reflow across comment pragmas.

Krasimir Georgiev via Phabricator via cfe-commits Thu, 02 Feb 2017 07:36:45 -0800

krasimir updated this revision to Diff 86813.
krasimir edited the summary of this revision.
krasimir added a comment.


- Add a break in line comment sections by pragmas


https://reviews.llvm.org/D29450

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  lib/Format/ContinuationIndenter.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/Format/UnwrappedLineParser.h
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -2331,6 +2331,22 @@
                    "// XXX: long",
                    getLLVMStyleWithColumns(20)));
 
+  // Don't reflow comment pragmas.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// IWYU pragma:",
+            format("// long long long long\n"
+                   "// IWYU pragma:",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            " * long\n"
+            " * IWYU pragma:\n"
+            " */",
+            format("/* long long long long\n"
+                   " * IWYU pragma:\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
   // Reflow lines that have a non-punctuation character among their first 2
   // characters.
   EXPECT_EQ("// long long long\n"
Index: lib/Format/UnwrappedLineParser.h
===================================================================
--- lib/Format/UnwrappedLineParser.h
+++ lib/Format/UnwrappedLineParser.h
@@ -19,6 +19,7 @@
 #include "FormatToken.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
 #include <list>
 #include <stack>
 
@@ -161,6 +162,8 @@
 
   const FormatStyle &Style;
   const AdditionalKeywords &Keywords;
+  
+  llvm::Regex CommentPragmasRegex;
 
   FormatTokenSource *Tokens;
   UnwrappedLineConsumer &Callback;
Index: lib/Format/UnwrappedLineParser.cpp
===================================================================
--- lib/Format/UnwrappedLineParser.cpp
+++ lib/Format/UnwrappedLineParser.cpp
@@ -202,7 +202,8 @@
                                          ArrayRef<FormatToken *> Tokens,
                                          UnwrappedLineConsumer &Callback)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
-      CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
+      CurrentLines(&Lines), Style(Style), Keywords(Keywords),
+      CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 
 void UnwrappedLineParser::reset() {
@@ -2048,10 +2049,18 @@
 // Checks if \p FormatTok is a line comment that continues the line comment
 // section on \p Line.
 static bool continuesLineComment(const FormatToken &FormatTok,
-                                 const UnwrappedLine &Line) {
+                                 const UnwrappedLine &Line,
+                                 llvm::Regex &CommentPragmasRegex) {
   if (Line.Tokens.empty())
     return false;
 
+  StringRef IndentContent = FormatTok.TokenText;
+  if (FormatTok.TokenText.startswith("//") ||
+      FormatTok.TokenText.startswith("/*"))
+    IndentContent = FormatTok.TokenText.substr(2);
+  if (CommentPragmasRegex.match(IndentContent))
+    return false;
+
   // If Line starts with a line comment, then FormatTok continues the comment
   // section if its original column is greater or equal to the original start
   // column of the line.
@@ -2066,31 +2075,58 @@
   // original start column of the min column token of the line.
   //
   // For example, the second line comment continues the first in these cases:
+  //
   // // first line
   // // second line
+  //
   // and:
+  //
   // // first line
   //  // second line
+  //
   // and:
+  //
   // int i; // first line
   //  // second line
+  //
   // and:
+  //
   // do { // first line
   //      // second line
   //   int i;
   // } while (true);
   //
+  // and:
+  //
+  // enum {
+  //   a, // first line
+  //    // second line
+  //   b
+  // };
+  //
   // The second line comment doesn't continue the first in these cases:
+  //
   //   // first line
   //  // second line
+  //
   // and:
+  //
   // int i; // first line
   // // second line
+  //
   // and:
+  //
   // do { // first line
   //   // second line
   //   int i;
   // } while (true);
+  //
+  // and:
+  //
+  // enum {
+  //   a, // first line
+  //   // second line
+  // };
   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
 
   // Scan for '{//'. If found, use the column of '{' as a min column for line
@@ -2103,6 +2139,11 @@
       break;
     }
     PreviousToken = Node.Tok;
+
+    // Grab the last newline preceding a token in this unwrapped line.
+    if (Node.Tok->NewlinesBefore > 0) {
+      MinColumnToken = Node.Tok;
+    }
   }
   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
     MinColumnToken = PreviousToken;
@@ -2130,7 +2171,9 @@
     //
     // FIXME: Consider putting separate line comment sections as children to the
     // unwrapped line instead.
-    if (isOnNewLine(**I) && JustComments && !continuesLineComment(**I, *Line))
+    (*I)->ContinuesLineCommentSection =
+        continuesLineComment(**I, *Line, CommentPragmasRegex);
+    if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
       addUnwrappedLine();
     pushToken(*I);
   }
@@ -2196,7 +2239,9 @@
 
     if (!FormatTok->Tok.is(tok::comment))
       return;
-    if (!continuesLineComment(*FormatTok, *Line) &&
+    FormatTok->ContinuesLineCommentSection =
+        continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
+    if (!FormatTok->ContinuesLineCommentSection &&
         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
       CommentsInCurrentLine = false;
     }
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -1213,7 +1213,7 @@
     BreakableToken::Split SplitBefore(StringRef::npos, 0);
     if (ReflowInProgress) {
       SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
-                                          RemainingSpace);
+                                          RemainingSpace, CommentPragmasRegex);
     }
     ReflowInProgress = SplitBefore.first != StringRef::npos;
     unsigned TailOffset =
Index: lib/Format/BreakableToken.h
===================================================================
--- lib/Format/BreakableToken.h
+++ lib/Format/BreakableToken.h
@@ -21,6 +21,7 @@
 #include "Encoding.h"
 #include "TokenAnnotator.h"
 #include "WhitespaceManager.h"
+#include "llvm/Support/Regex.h"
 #include <utility>
 
 namespace clang {
@@ -118,7 +119,8 @@
   /// needs to be reformatted before any breaks are made.
   virtual Split getSplitBefore(unsigned LineIndex,
                                unsigned PreviousEndColumn,
-                               unsigned ColumnLimit) const {
+                               unsigned ColumnLimit,
+                               llvm::Regex& CommentPragmasRegex) const {
     return Split(StringRef::npos, 0);
   }
 
@@ -238,7 +240,8 @@
 
   // Checks if the content of line LineIndex may be reflown with the previous
   // line.
-  bool mayReflow(unsigned LineIndex) const;
+  virtual bool mayReflow(unsigned LineIndex,
+                         llvm::Regex &CommentPragmasRegex) const = 0;
 
   // Contains the original text of the lines of the block comment.
   //
@@ -307,7 +310,8 @@
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                    WhitespaceManager &Whitespaces) override;
   Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit) const override;
+                       unsigned ColumnLimit,
+                       llvm::Regex &CommentPragmasRegex) const override;
   unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
                                          unsigned TailOffset,
                                          unsigned PreviousEndColumn,
@@ -317,6 +321,8 @@
                                unsigned ColumnLimit,
                                Split SplitBefore,
                                WhitespaceManager &Whitespaces) override;
+  bool mayReflow(unsigned LineIndex,
+                 llvm::Regex &CommentPragmasRegex) const override;
 
 private:
   // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
@@ -371,15 +377,18 @@
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                    WhitespaceManager &Whitespaces) override;
   Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit) const override;
+                       unsigned ColumnLimit,
+                       llvm::Regex &CommentPragmasRegex) const override;
   unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset,
                                          unsigned PreviousEndColumn,
                                          unsigned ColumnLimit,
                                          Split SplitBefore) const override;
   void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
                                unsigned ColumnLimit, Split SplitBefore,
                                WhitespaceManager &Whitespaces) override;
   void updateNextToken(LineState& State) const override;
+  bool mayReflow(unsigned LineIndex,
+                 llvm::Regex &CommentPragmasRegex) const override;
 
 private:
   unsigned getContentStartColumn(unsigned LineIndex,
Index: lib/Format/BreakableToken.cpp
===================================================================
--- lib/Format/BreakableToken.cpp
+++ lib/Format/BreakableToken.cpp
@@ -321,13 +321,6 @@
          (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
 }
 
-bool BreakableComment::mayReflow(unsigned LineIndex) const {
-  return LineIndex > 0 && mayReflowContent(Content[LineIndex]) &&
-         !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) &&
-         (!Tok.is(TT_LineComment) ||
-          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]);
-}
-
 BreakableBlockComment::BreakableBlockComment(
     const FormatToken &Token, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
@@ -501,8 +494,9 @@
 BreakableToken::Split BreakableBlockComment::getSplitBefore(
     unsigned LineIndex,
     unsigned PreviousEndColumn,
-    unsigned ColumnLimit) const {
-  if (!mayReflow(LineIndex))
+    unsigned ColumnLimit,
+    llvm::Regex &CommentPragmasRegex) const {
+  if (!mayReflow(LineIndex, CommentPragmasRegex))
     return Split(StringRef::npos, 0);
   StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
   return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
@@ -622,6 +616,19 @@
       InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
 }
 
+bool BreakableBlockComment::mayReflow(unsigned LineIndex,
+                                      llvm::Regex &CommentPragmasRegex) const {
+  // Content[LineIndex] may exclude the indent after the '*' decoration. In that
+  // case, we compute the start of the comment pragma manually.
+  StringRef IndentContent = Content[LineIndex];
+  if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
+    IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
+  }
+  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+         !switchesFormatting(tokenAt(LineIndex));
+}
+
 unsigned
 BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
                                              unsigned TailOffset) const {
@@ -748,10 +755,10 @@
 }
 
 BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
-    unsigned LineIndex,
-    unsigned PreviousEndColumn,
-    unsigned ColumnLimit) const {
-  if (!mayReflow(LineIndex)) return Split(StringRef::npos, 0);
+    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+    llvm::Regex &CommentPragmasRegex) const {
+  if (!mayReflow(LineIndex, CommentPragmasRegex))
+    return Split(StringRef::npos, 0);
   return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
                         ColumnLimit);
 }
@@ -850,6 +857,20 @@
   }
 }
 
+bool BreakableLineCommentSection::mayReflow(
+    unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
+  // Line comments have the indent as part of the prefix, so we need to
+  // recompute the start of the line.
+  StringRef IndentContent = Content[LineIndex];
+  if (Lines[LineIndex].startswith("//")) {
+    IndentContent = Lines[LineIndex].substr(2);
+  }
+  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+         !switchesFormatting(tokenAt(LineIndex)) &&
+         OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
+}
+
 unsigned
 BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
                                                    unsigned TailOffset) const {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D29450: [clang-format] Don't reflow across comment pragmas.

Reply via email to