https://github.com/hnakamura5 updated https://github.com/llvm/llvm-project/pull/78032
>From d0767350f26215e86dee039427183630b3f02668 Mon Sep 17 00:00:00 2001 From: hnakamura5 <hnakamu...@outlook.com> Date: Sat, 13 Jan 2024 21:44:34 +0900 Subject: [PATCH 1/2] [clang-format] TableGen multi line string support. --- clang/lib/Format/ContinuationIndenter.cpp | 3 + clang/lib/Format/FormatToken.h | 1 + clang/lib/Format/FormatTokenLexer.cpp | 57 +++++++++++++++++++ clang/lib/Format/FormatTokenLexer.h | 3 + clang/lib/Format/TokenAnnotator.cpp | 2 +- clang/unittests/Format/TokenAnnotatorTest.cpp | 5 ++ 6 files changed, 70 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 102504182c4505..e6eaaa9ab45706 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1591,6 +1591,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.StartOfStringLiteral = State.Column + 1; if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column + 1; + } else if (Current.is(TT_TableGenMultiLineString) && + State.StartOfStringLiteral == 0) { + State.StartOfStringLiteral = State.Column + 1; } else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) { State.StartOfStringLiteral = State.Column; } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index d5ef627f1348d3..dede89f2600150 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -148,6 +148,7 @@ namespace format { TYPE(StructLBrace) \ TYPE(StructRBrace) \ TYPE(StructuredBindingLSquare) \ + TYPE(TableGenMultiLineString) \ TYPE(TemplateCloser) \ TYPE(TemplateOpener) \ TYPE(TemplateString) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index a1fd6dd6effe6c..1060009bdcf131 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -93,6 +93,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { // string literals are correctly identified. handleCSharpVerbatimAndInterpolatedStrings(); } + if (Style.isTableGen()) + handleTableGenMultilineString(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); @@ -272,6 +274,14 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; } } + if (Style.isTableGen()) { + if (tryMergeTokens({tok::l_square, tok::l_brace}, + TT_TableGenMultiLineString)) { + // Multi line string starts with [{ + Tokens.back()->Tok.setKind(tok::string_literal); + return; + } + } } bool FormatTokenLexer::tryMergeNSStringLiteral() { @@ -763,6 +773,53 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1))); } +void FormatTokenLexer::handleTableGenMultilineString() { + FormatToken *MultiLineString = Tokens.back(); + if (MultiLineString->isNot(TT_TableGenMultiLineString)) + return; + + bool PrevIsRBrace = false; + const char *FirstBreak = nullptr; + const char *LastBreak = nullptr; + const char *Begin = MultiLineString->TokenText.begin(); + // Skip until }], the closer of multi line string found. + for (const char *Current = Begin, *End = Lex->getBuffer().end(); + Current != End; ++Current) { + if (PrevIsRBrace && *Current == ']') { + // }] is the end of multi line string. + if (!FirstBreak) + FirstBreak = Current; + MultiLineString->TokenText = StringRef(Begin, Current - Begin + 1); + // ColumnWidth is only the width of the first line. + MultiLineString->ColumnWidth = encoding::columnWidthWithTabs( + StringRef(Begin, FirstBreak - Begin + 1), + MultiLineString->OriginalColumn, Style.TabWidth, Encoding); + if (LastBreak) { + // Set LastLineColumnWidth if multi line string has multiple lines. + MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs( + StringRef(LastBreak + 1, Current - LastBreak), + MultiLineString->OriginalColumn, Style.TabWidth, Encoding); + } + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Current + 1))); + return; + } + PrevIsRBrace = false; + if (*Current == '\n') { + MultiLineString->IsMultiline = true; + // Assure LastBreak is not equal to FirstBreak. + if (!FirstBreak) + FirstBreak = Current; + LastBreak = Current; + continue; + } + if (*Current == '}') { + // Memorize '}'. If next character is ']', they are the closer. + PrevIsRBrace = true; + continue; + } + } +} + void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index bb6a8ab69c1be1..1dec6bbc41514c 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -95,6 +95,9 @@ class FormatTokenLexer { void handleCSharpVerbatimAndInterpolatedStrings(); + // Handles TableGen multiline strings. It has the form [{ ... }]. + void handleTableGenMultilineString(); + void tryParsePythonComment(); bool tryMerge_TMacro(); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 24ce18a64348c1..661118970336a2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1710,7 +1710,7 @@ class AnnotatingParser { TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, - TT_BracedListLBrace)) { + TT_BracedListLBrace, TT_TableGenMultiLineString)) { CurrentToken->setType(TT_Unknown); } CurrentToken->Role.reset(); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 92f57a77cdaf01..5ca6a76f840bdf 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2193,6 +2193,11 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0])); ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def)); ASSERT_TRUE(Tokens[1]->is(TT_StartOfName)); + + // Code, the multiline string token. + Tokens = Annotate("[{ code is multiline string }]"); + ASSERT_EQ(Tokens.size(), 2u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { >From bba0a09813b68afa5e7d0eb90da71d7e7453456b Mon Sep 17 00:00:00 2001 From: hnakamura5 <hnakamu...@outlook.com> Date: Sun, 14 Jan 2024 14:31:23 +0900 Subject: [PATCH 2/2] Fixed the reviewed points. --- clang/lib/Format/FormatToken.h | 1 + clang/lib/Format/FormatTokenLexer.cpp | 78 ++++++++----------- clang/lib/Format/TokenAnnotator.cpp | 2 +- clang/unittests/Format/TokenAnnotatorTest.cpp | 11 +++ 4 files changed, 45 insertions(+), 47 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index dede89f2600150..666245f401a204 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -434,6 +434,7 @@ struct FormatToken { setType(T); } bool isTypeFinalized() const { return TypeIsFinalized; } + void setTypeIsFinalized() { TypeIsFinalized = true; } /// Used to set an operator precedence explicitly. prec::Level ForcedPrecedence = prec::Unknown; diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 1060009bdcf131..cf8d4193029cb4 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -274,13 +274,13 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; } } - if (Style.isTableGen()) { - if (tryMergeTokens({tok::l_square, tok::l_brace}, - TT_TableGenMultiLineString)) { - // Multi line string starts with [{ - Tokens.back()->Tok.setKind(tok::string_literal); - return; - } + // TableGen's Multi line string starts with [{ + if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace}, + TT_TableGenMultiLineString)) { + // This must never be annotated as other types. + Tokens.back()->setTypeIsFinalized(); + Tokens.back()->Tok.setKind(tok::string_literal); + return; } } @@ -778,45 +778,31 @@ void FormatTokenLexer::handleTableGenMultilineString() { if (MultiLineString->isNot(TT_TableGenMultiLineString)) return; - bool PrevIsRBrace = false; - const char *FirstBreak = nullptr; - const char *LastBreak = nullptr; - const char *Begin = MultiLineString->TokenText.begin(); - // Skip until }], the closer of multi line string found. - for (const char *Current = Begin, *End = Lex->getBuffer().end(); - Current != End; ++Current) { - if (PrevIsRBrace && *Current == ']') { - // }] is the end of multi line string. - if (!FirstBreak) - FirstBreak = Current; - MultiLineString->TokenText = StringRef(Begin, Current - Begin + 1); - // ColumnWidth is only the width of the first line. - MultiLineString->ColumnWidth = encoding::columnWidthWithTabs( - StringRef(Begin, FirstBreak - Begin + 1), - MultiLineString->OriginalColumn, Style.TabWidth, Encoding); - if (LastBreak) { - // Set LastLineColumnWidth if multi line string has multiple lines. - MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs( - StringRef(LastBreak + 1, Current - LastBreak), - MultiLineString->OriginalColumn, Style.TabWidth, Encoding); - } - resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Current + 1))); - return; - } - PrevIsRBrace = false; - if (*Current == '\n') { - MultiLineString->IsMultiline = true; - // Assure LastBreak is not equal to FirstBreak. - if (!FirstBreak) - FirstBreak = Current; - LastBreak = Current; - continue; - } - if (*Current == '}') { - // Memorize '}'. If next character is ']', they are the closer. - PrevIsRBrace = true; - continue; - } + auto OpenOffset = Lex->getCurrentBufferOffset() - 2 /* "[{" */; + // "}]" is the end of multi line string. + auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset); + if (CloseOffset == StringRef::npos) + return; + auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset + 2); + MultiLineString->TokenText = Text; + resetLexer(SourceMgr.getFileOffset( + Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size()))); + // Set ColumnWidth and LastLineColumnWidth. + auto FirstLineText = Text; + auto FirstBreak = Text.find('\n'); + if (FirstBreak != StringRef::npos) { + MultiLineString->IsMultiline = true; + FirstLineText = Text.substr(0, FirstBreak + 1); + } + // ColumnWidth holds only the width of the first line. + MultiLineString->ColumnWidth = encoding::columnWidthWithTabs( + FirstLineText, MultiLineString->OriginalColumn, Style.TabWidth, Encoding); + auto LastBreak = Text.rfind('\n'); + if (LastBreak != StringRef::npos) { + // Set LastLineColumnWidth if it has multiple lines. + MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs( + Text.substr(LastBreak + 1, Text.size()), + MultiLineString->OriginalColumn, Style.TabWidth, Encoding); } } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 661118970336a2..24ce18a64348c1 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1710,7 +1710,7 @@ class AnnotatingParser { TT_UnionLBrace, TT_RequiresClause, TT_RequiresClauseInARequiresExpression, TT_RequiresExpression, TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace, - TT_BracedListLBrace, TT_TableGenMultiLineString)) { + TT_BracedListLBrace)) { CurrentToken->setType(TT_Unknown); } CurrentToken->Role.reset(); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 5ca6a76f840bdf..117d8fe8f7dc12 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2198,6 +2198,17 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { Tokens = Annotate("[{ code is multiline string }]"); ASSERT_EQ(Tokens.size(), 2u) << Tokens; EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); + EXPECT_FALSE(Tokens[0]->IsMultiline); + // Case with multiple lines. + Tokens = Annotate("[{ It can break\n" + " across lines and the line breaks\n" + " are retained in \n" + " the string. }]"); + ASSERT_EQ(Tokens.size(), 2u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); + EXPECT_EQ(Tokens[0]->ColumnWidth, sizeof("[{ It can break\n") - 1); + EXPECT_TRUE(Tokens[0]->IsMultiline); + EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits