klimek created this revision. When we break a long line like: Column limit: 21
| // foo foo foo foo foo foo foo foo foo foo foo foo The local decision when to allow protruding vs. breaking can lead to this outcome (2 excess characters, 2 breaks): // foo foo foo foo foo // foo foo foo foo foo // foo foo While strictly staying within the column limit leads to this strictly better outcome (fully below the column limit, 2 breaks): // foo foo foo foo // foo foo foo foo // foo foo foo foo To get an optimal solution, we would need to consider all combinations of excess characters vs. breaking for all lines, but that would lead to a significant increase in the search space of the algorithm for little gain. Instead, we blindly try both approches and·select the one that leads to the overall lower penalty. Repository: rC Clang https://reviews.llvm.org/D40605 Files: lib/Format/ContinuationIndenter.cpp lib/Format/ContinuationIndenter.h unittests/Format/FormatTest.cpp
Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -9934,8 +9934,8 @@ Style.PenaltyExcessCharacter = 90; verifyFormat("int a; // the comment", Style); EXPECT_EQ("int a; // the comment\n" - " // aa", - format("int a; // the comment aa", Style)); + " // aaa", + format("int a; // the comment aaa", Style)); EXPECT_EQ("int a; /* first line\n" " * second line\n" " * third line\n" @@ -9963,14 +9963,14 @@ Style)); EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", format("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", Style)); EXPECT_EQ("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", format("// foo bar baz bazfoo\n" - "// foo bar\n", + "// foo bar foo bar\n", Style)); // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the @@ -9996,6 +9996,15 @@ "// foo bar baz bazfoo bar\n" "// foo bar\n", Style)); + + // Make sure we do not keep protruding characters if strict mode reflow is + // cheaper than keeping protruding characters. + Style.ColumnLimit = 21; + EXPECT_EQ("// foo foo foo foo\n" + "// foo foo foo foo\n" + "// foo foo foo foo\n", + format("// foo foo foo foo foo foo foo foo foo foo foo foo\n", + Style)); } #define EXPECT_ALL_STYLES_EQUAL(Styles) \ Index: lib/Format/ContinuationIndenter.h =================================================================== --- lib/Format/ContinuationIndenter.h +++ lib/Format/ContinuationIndenter.h @@ -123,14 +123,25 @@ /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// - /// \returns An extra penalty if a token was broken, otherwise 0. + /// \returns A pair (penalty, exceeded), where penalty is the extra penalty + /// when tokens are broken or lines exceed the column limit, and exceeded + /// indicates whether the algorithm purposefully left lines exceeding the + /// column limit. /// /// The returned penalty will cover the cost of the additional line breaks /// and column limit violation in all lines except for the last one. The /// penalty for the column limit violation in the last line (and in single /// line tokens) is handled in \c addNextStateToQueue. - unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, - bool AllowBreak, bool DryRun); + /// + /// \p Strict indicates whether reflowing is allowed to leave characters + /// protruding the column limit; if true, lines will be split strictly within + /// the column limit where possible; if false, words are allowed to protrude + /// over the column limit as long as the penalty is less than the penalty + /// of a break. + std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, + LineState &State, + bool AllowBreak, bool DryRun, + bool Strict); /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr /// if the current token cannot be broken. Index: lib/Format/ContinuationIndenter.cpp =================================================================== --- lib/Format/ContinuationIndenter.cpp +++ lib/Format/ContinuationIndenter.cpp @@ -1390,7 +1390,36 @@ Penalty = addMultilineToken(Current, State); } else if (State.Line->Type != LT_ImportStatement) { // We generally don't break import statements. - Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun); + LineState OriginalState = State; + + // Whether we force the reflowing algorithm to stay strictly within the + // column limit. + bool Strict = false; + // Whether the first non-strict attempt at reflowing did intentionally + // exceed the column limit. + bool Exceeded = false; + std::tie(Penalty, Exceeded) = breakProtrudingToken( + Current, State, AllowBreak, /*DryRun=*/true, Strict); + if (Exceeded) { + // If non-strict reflowing exceeds the column limit, try whether strict + // reflowing leads to an overall lower penalty. + LineState StrictState = OriginalState; + unsigned StrictPenalty = + breakProtrudingToken(Current, StrictState, AllowBreak, + /*DryRun=*/true, /*Strict=*/true) + .first; + Strict = StrictPenalty <= Penalty; + if (Strict) { + Penalty = StrictPenalty; + State = StrictState; + } + } + if (!DryRun) { + // If we're not in dry-run mode, apply the changes with the decision on + // strictness made above. + breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false, + Strict); + } } if (State.Column > getColumnLimit(State)) { unsigned ExcessCharacters = State.Column - getColumnLimit(State); @@ -1480,28 +1509,31 @@ return nullptr; } -unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool AllowBreak, - bool DryRun) { +std::pair<unsigned, bool> +ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, bool AllowBreak, + bool DryRun, bool Strict) { std::unique_ptr<const BreakableToken> Token = createBreakableToken(Current, State, AllowBreak); if (!Token) - return 0; + return {0, true}; assert(Token->getLineCount() > 0); unsigned ColumnLimit = getColumnLimit(State); if (Current.is(TT_LineComment)) { // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; } if (Current.UnbreakableTailLength >= ColumnLimit) - return 0; + return {0, true}; // ColumnWidth was already accounted into State.Column before calling // breakProtrudingToken. unsigned StartColumn = State.Column - Current.ColumnWidth; unsigned NewBreakPenalty = Current.isStringLiteral() ? Style.PenaltyBreakString : Style.PenaltyBreakComment; + // Stores whether we intentionally decide to let a line exceed the column + // limit. + bool Exceeded = false; // Stores whether we introduce a break anywhere in the token. bool BreakInserted = Token->introducesBreakBeforeToken(); // Store whether we inserted a new line break at the end of the previous @@ -1612,17 +1644,19 @@ bool ContinueOnLine = ContentStartColumn + ToNextSplitColumns <= ColumnLimit; unsigned ExcessCharactersPenalty = 0; - if (!ContinueOnLine) { + if (!ContinueOnLine && !Strict) { // Similarly, if the excess characters' penalty is lower than the // penalty of introducing a new break, continue on the current line. ExcessCharactersPenalty = (ContentStartColumn + ToNextSplitColumns - ColumnLimit) * Style.PenaltyExcessCharacter; DEBUG(llvm::dbgs() << " Penalty excess: " << ExcessCharactersPenalty << "\n break : " << NewBreakPenalty << "\n"); - if (ExcessCharactersPenalty < NewBreakPenalty) + if (ExcessCharactersPenalty < NewBreakPenalty) { + Exceeded = true; ContinueOnLine = true; + } } if (ContinueOnLine) { DEBUG(llvm::dbgs() << " Continuing on line...\n"); @@ -1817,7 +1851,7 @@ Token->updateNextToken(State); - return Penalty; + return {Penalty, Exceeded}; } unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits